Spaces:

ayeshaishaq004
/

WebsiteURLClassifier

Sleeping

File size: 3,776 Bytes

import streamlit as st
import requests
import pandas as pd
import socket
import whois
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from datetime import datetime
import pickle


def extract_features(url):
    try:
        socket.inet_aton(urlparse(url).netloc)
        having_IP_Address = 1
    except:
        having_IP_Address = 0

    URL_Length = 1 if len(url) >= 54 else 0

    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.content, "html.parser")
        anchors = soup.find_all("a", href=True)
        if len(anchors) == 0:
            URL_of_Anchor = 1
        else:
            unsafe = [a for a in anchors if not a['href'].startswith(url)]
            URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
    except:
        URL_of_Anchor = 1

    try:
        domain_info = whois.whois(urlparse(url).netloc)
        if isinstance(domain_info.creation_date, list):
            creation_date = domain_info.creation_date[0]
        else:
            creation_date = domain_info.creation_date
        age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
    except:
        age_of_domain = 0

    SSLfinal_State = 1 if url.startswith("https") else 0

    try:
        request_response = requests.get(url, timeout=5)
        if request_response.url == url:
            Request_URL = 0
        else:
            Request_URL = 1
    except:
        Request_URL = 1

    try:
        forms = soup.find_all("form", action=True)
        if len(forms) == 0:
            SFH = 1
        else:
            for form in forms:
                if form['action'] == "about:blank" or not form['action'].startswith("http"):
                    SFH = 1
                    break
            else:
                SFH = 0
    except:
        SFH = 1

    try:
        if "window.open" in response.text:
            popUpWidnow = 1
        else:
            popUpWidnow = 0
    except:
        popUpWidnow = 0

    return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]


def predict_url(url, model):
    features = extract_features(url)
    X_columns = ['SFH', 'popUpWidnow', 'SSLfinal_State', 'Request_URL', 'URL_of_Anchor', 'URL_Length', 'age_of_domain', 'having_IP_Address']
    features_df = pd.DataFrame([features], columns=X_columns)
    prediction = model.predict(features_df)
    if prediction[0] == 1:
        return "Phishing"
    elif prediction[0] == 0:
        return "Legitimate"
    else:
        return "Unknown"


# Streamlit app configuration
st.set_page_config(page_title='Phishing URL Detection', layout='centered')

# App Header
st.markdown("""
    <style>
    body { background-color: #f0f2f6; }
    .main { background-color: white; padding: 2rem; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
    </style>
""", unsafe_allow_html=True)

st.title('🔍 Phishing URL Detection App')
st.write('Enter a URL to check if it is Phishing or Legitimate.')


# Load the trained model
with open('phishing_model.pkl', 'rb') as f:
    model = pickle.load(f)
    

# Input URL
url_input = st.text_input('Enter URL:', '')

if st.button('Check URL'):
    if url_input:
        try:
            # Make prediction
            result = predict_url(url_input, model)

            if result == 'Phishing':
                st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
            elif result == 'Legitimate':
                st.success('✅ This URL is likely **Legitimate**.')
            else:
                st.warning('⚠️ Unable to determine. Try again later.')

        except Exception as e:
            st.error(f'Error: {e}')
    else:
        st.warning('Please enter a valid URL.')