File size: 3,776 Bytes
a79a79d
 
526e0de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9097a5e
526e0de
9097a5e
 
526e0de
 
 
 
 
 
 
 
a79a79d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526e0de
 
 
28ff01a
526e0de
18d390b
a79a79d
 
 
 
 
526e0de
ea95415
a79a79d
526e0de
a79a79d
526e0de
a79a79d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import streamlit as st
import requests
import pandas as pd
import socket
import whois
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from datetime import datetime
import pickle


def extract_features(url):
    try:
        socket.inet_aton(urlparse(url).netloc)
        having_IP_Address = 1
    except:
        having_IP_Address = 0

    URL_Length = 1 if len(url) >= 54 else 0

    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.content, "html.parser")
        anchors = soup.find_all("a", href=True)
        if len(anchors) == 0:
            URL_of_Anchor = 1
        else:
            unsafe = [a for a in anchors if not a['href'].startswith(url)]
            URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
    except:
        URL_of_Anchor = 1

    try:
        domain_info = whois.whois(urlparse(url).netloc)
        if isinstance(domain_info.creation_date, list):
            creation_date = domain_info.creation_date[0]
        else:
            creation_date = domain_info.creation_date
        age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
    except:
        age_of_domain = 0

    SSLfinal_State = 1 if url.startswith("https") else 0

    try:
        request_response = requests.get(url, timeout=5)
        if request_response.url == url:
            Request_URL = 0
        else:
            Request_URL = 1
    except:
        Request_URL = 1

    try:
        forms = soup.find_all("form", action=True)
        if len(forms) == 0:
            SFH = 1
        else:
            for form in forms:
                if form['action'] == "about:blank" or not form['action'].startswith("http"):
                    SFH = 1
                    break
            else:
                SFH = 0
    except:
        SFH = 1

    try:
        if "window.open" in response.text:
            popUpWidnow = 1
        else:
            popUpWidnow = 0
    except:
        popUpWidnow = 0

    return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]


def predict_url(url, model):
    features = extract_features(url)
    X_columns = ['SFH', 'popUpWidnow', 'SSLfinal_State', 'Request_URL', 'URL_of_Anchor', 'URL_Length', 'age_of_domain', 'having_IP_Address']
    features_df = pd.DataFrame([features], columns=X_columns)
    prediction = model.predict(features_df)
    if prediction[0] == 1:
        return "Phishing"
    elif prediction[0] == 0:
        return "Legitimate"
    else:
        return "Unknown"


# Streamlit app configuration
st.set_page_config(page_title='Phishing URL Detection', layout='centered')

# App Header
st.markdown("""
    <style>
    body { background-color: #f0f2f6; }
    .main { background-color: white; padding: 2rem; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
    </style>
""", unsafe_allow_html=True)

st.title('πŸ” Phishing URL Detection App')
st.write('Enter a URL to check if it is Phishing or Legitimate.')


# Load the trained model
with open('phishing_model.pkl', 'rb') as f:
    model = pickle.load(f)
    

# Input URL
url_input = st.text_input('Enter URL:', '')

if st.button('Check URL'):
    if url_input:
        try:
            # Make prediction
            result = predict_url(url_input, model)

            if result == 'Phishing':
                st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
            elif result == 'Legitimate':
                st.success('βœ… This URL is likely **Legitimate**.')
            else:
                st.warning('⚠️ Unable to determine. Try again later.')

        except Exception as e:
            st.error(f'Error: {e}')
    else:
        st.warning('Please enter a valid URL.')