Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,92 @@
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
# Streamlit app configuration
|
6 |
st.set_page_config(page_title='Phishing URL Detection', layout='centered')
|
@@ -17,22 +103,26 @@ st.title('π Phishing URL Detection App')
|
|
17 |
st.write('Enter a URL to check if it is Phishing or Legitimate.')
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# Input URL
|
21 |
url_input = st.text_input('Enter URL:', '')
|
22 |
|
23 |
-
# Hugging Face model endpoint
|
24 |
-
API_URL = 'https://huggingface.co/ayeshaishaq004/website-url-classifier/resolve/main/phishing_model.pkl'
|
25 |
-
|
26 |
if st.button('Check URL'):
|
27 |
if url_input:
|
28 |
try:
|
29 |
-
#
|
30 |
-
|
31 |
-
prediction = response.json().get('prediction', 'Error: Could not get prediction')
|
32 |
|
33 |
-
if
|
34 |
st.error('π¨ This URL is likely a **Phishing Site**. Be careful!')
|
35 |
-
elif
|
36 |
st.success('β
This URL is likely **Legitimate**.')
|
37 |
else:
|
38 |
st.warning('β οΈ Unable to determine. Try again later.')
|
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
+
import pandas as pd
|
4 |
+
import socket
|
5 |
+
import whois
|
6 |
+
from urllib.parse import urlparse
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
from datetime import datetime
|
9 |
+
import pickle
|
10 |
+
|
11 |
+
|
12 |
+
def extract_features(url):
|
13 |
+
try:
|
14 |
+
socket.inet_aton(urlparse(url).netloc)
|
15 |
+
having_IP_Address = 1
|
16 |
+
except:
|
17 |
+
having_IP_Address = 0
|
18 |
+
|
19 |
+
URL_Length = 1 if len(url) >= 54 else 0
|
20 |
+
|
21 |
+
try:
|
22 |
+
response = requests.get(url, timeout=5)
|
23 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
24 |
+
anchors = soup.find_all("a", href=True)
|
25 |
+
if len(anchors) == 0:
|
26 |
+
URL_of_Anchor = 1
|
27 |
+
else:
|
28 |
+
unsafe = [a for a in anchors if not a['href'].startswith(url)]
|
29 |
+
URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
|
30 |
+
except:
|
31 |
+
URL_of_Anchor = 1
|
32 |
+
|
33 |
+
try:
|
34 |
+
domain_info = whois.whois(urlparse(url).netloc)
|
35 |
+
if isinstance(domain_info.creation_date, list):
|
36 |
+
creation_date = domain_info.creation_date[0]
|
37 |
+
else:
|
38 |
+
creation_date = domain_info.creation_date
|
39 |
+
age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
|
40 |
+
except:
|
41 |
+
age_of_domain = 0
|
42 |
+
|
43 |
+
SSLfinal_State = 1 if url.startswith("https") else 0
|
44 |
+
|
45 |
+
try:
|
46 |
+
request_response = requests.get(url, timeout=5)
|
47 |
+
if request_response.url == url:
|
48 |
+
Request_URL = 0
|
49 |
+
else:
|
50 |
+
Request_URL = 1
|
51 |
+
except:
|
52 |
+
Request_URL = 1
|
53 |
+
|
54 |
+
try:
|
55 |
+
forms = soup.find_all("form", action=True)
|
56 |
+
if len(forms) == 0:
|
57 |
+
SFH = 1
|
58 |
+
else:
|
59 |
+
for form in forms:
|
60 |
+
if form['action'] == "about:blank" or not form['action'].startswith("http"):
|
61 |
+
SFH = 1
|
62 |
+
break
|
63 |
+
else:
|
64 |
+
SFH = 0
|
65 |
+
except:
|
66 |
+
SFH = 1
|
67 |
+
|
68 |
+
try:
|
69 |
+
if "window.open" in response.text:
|
70 |
+
popUpWidnow = 1
|
71 |
+
else:
|
72 |
+
popUpWidnow = 0
|
73 |
+
except:
|
74 |
+
popUpWidnow = 0
|
75 |
+
|
76 |
+
return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]
|
77 |
+
|
78 |
+
|
79 |
+
def predict_url(url, model, X):
|
80 |
+
features = extract_features(url)
|
81 |
+
features_df = pd.DataFrame([features], columns=X.columns)
|
82 |
+
prediction = model.predict(features_df)
|
83 |
+
if prediction[0] == 1:
|
84 |
+
return "Phishing"
|
85 |
+
elif prediction[0] == 0:
|
86 |
+
return "Legitimate"
|
87 |
+
else:
|
88 |
+
return "Unknown"
|
89 |
+
|
90 |
|
91 |
# Streamlit app configuration
|
92 |
st.set_page_config(page_title='Phishing URL Detection', layout='centered')
|
|
|
103 |
st.write('Enter a URL to check if it is Phishing or Legitimate.')
|
104 |
|
105 |
|
106 |
+
# Load the trained model
|
107 |
+
with open('phishing_model.pkl', 'rb') as f:
|
108 |
+
model = pickle.load(f)
|
109 |
+
|
110 |
+
# Load the feature columns
|
111 |
+
with open('X_columns.pkl', 'rb') as f:
|
112 |
+
X_columns = pickle.load(f)
|
113 |
+
|
114 |
# Input URL
|
115 |
url_input = st.text_input('Enter URL:', '')
|
116 |
|
|
|
|
|
|
|
117 |
if st.button('Check URL'):
|
118 |
if url_input:
|
119 |
try:
|
120 |
+
# Make prediction
|
121 |
+
result = predict_url(url_input, model, X_columns)
|
|
|
122 |
|
123 |
+
if result == 'Phishing':
|
124 |
st.error('π¨ This URL is likely a **Phishing Site**. Be careful!')
|
125 |
+
elif result == 'Legitimate':
|
126 |
st.success('β
This URL is likely **Legitimate**.')
|
127 |
else:
|
128 |
st.warning('β οΈ Unable to determine. Try again later.')
|