ayeshaishaq004 commited on
Commit
526e0de
Β·
verified Β·
1 Parent(s): ad78801

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -9
app.py CHANGED
@@ -1,6 +1,92 @@
1
  import streamlit as st
2
  import requests
3
- from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  # Streamlit app configuration
6
  st.set_page_config(page_title='Phishing URL Detection', layout='centered')
@@ -17,22 +103,26 @@ st.title('πŸ” Phishing URL Detection App')
17
  st.write('Enter a URL to check if it is Phishing or Legitimate.')
18
 
19
 
 
 
 
 
 
 
 
 
20
  # Input URL
21
  url_input = st.text_input('Enter URL:', '')
22
 
23
- # Hugging Face model endpoint
24
- API_URL = 'https://huggingface.co/ayeshaishaq004/website-url-classifier/resolve/main/phishing_model.pkl'
25
-
26
  if st.button('Check URL'):
27
  if url_input:
28
  try:
29
- # Sending URL to model for prediction
30
- response = requests.post(API_URL, json={'url': url_input})
31
- prediction = response.json().get('prediction', 'Error: Could not get prediction')
32
 
33
- if prediction == 'Phishing':
34
  st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
35
- elif prediction == 'Legitimate':
36
  st.success('βœ… This URL is likely **Legitimate**.')
37
  else:
38
  st.warning('⚠️ Unable to determine. Try again later.')
 
1
  import streamlit as st
2
  import requests
3
+ import pandas as pd
4
+ import socket
5
+ import whois
6
+ from urllib.parse import urlparse
7
+ from bs4 import BeautifulSoup
8
+ from datetime import datetime
9
+ import pickle
10
+
11
+
12
+ def extract_features(url):
13
+ try:
14
+ socket.inet_aton(urlparse(url).netloc)
15
+ having_IP_Address = 1
16
+ except:
17
+ having_IP_Address = 0
18
+
19
+ URL_Length = 1 if len(url) >= 54 else 0
20
+
21
+ try:
22
+ response = requests.get(url, timeout=5)
23
+ soup = BeautifulSoup(response.content, "html.parser")
24
+ anchors = soup.find_all("a", href=True)
25
+ if len(anchors) == 0:
26
+ URL_of_Anchor = 1
27
+ else:
28
+ unsafe = [a for a in anchors if not a['href'].startswith(url)]
29
+ URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
30
+ except:
31
+ URL_of_Anchor = 1
32
+
33
+ try:
34
+ domain_info = whois.whois(urlparse(url).netloc)
35
+ if isinstance(domain_info.creation_date, list):
36
+ creation_date = domain_info.creation_date[0]
37
+ else:
38
+ creation_date = domain_info.creation_date
39
+ age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
40
+ except:
41
+ age_of_domain = 0
42
+
43
+ SSLfinal_State = 1 if url.startswith("https") else 0
44
+
45
+ try:
46
+ request_response = requests.get(url, timeout=5)
47
+ if request_response.url == url:
48
+ Request_URL = 0
49
+ else:
50
+ Request_URL = 1
51
+ except:
52
+ Request_URL = 1
53
+
54
+ try:
55
+ forms = soup.find_all("form", action=True)
56
+ if len(forms) == 0:
57
+ SFH = 1
58
+ else:
59
+ for form in forms:
60
+ if form['action'] == "about:blank" or not form['action'].startswith("http"):
61
+ SFH = 1
62
+ break
63
+ else:
64
+ SFH = 0
65
+ except:
66
+ SFH = 1
67
+
68
+ try:
69
+ if "window.open" in response.text:
70
+ popUpWidnow = 1
71
+ else:
72
+ popUpWidnow = 0
73
+ except:
74
+ popUpWidnow = 0
75
+
76
+ return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]
77
+
78
+
79
+ def predict_url(url, model, X):
80
+ features = extract_features(url)
81
+ features_df = pd.DataFrame([features], columns=X.columns)
82
+ prediction = model.predict(features_df)
83
+ if prediction[0] == 1:
84
+ return "Phishing"
85
+ elif prediction[0] == 0:
86
+ return "Legitimate"
87
+ else:
88
+ return "Unknown"
89
+
90
 
91
  # Streamlit app configuration
92
  st.set_page_config(page_title='Phishing URL Detection', layout='centered')
 
103
  st.write('Enter a URL to check if it is Phishing or Legitimate.')
104
 
105
 
106
+ # Load the trained model
107
+ with open('phishing_model.pkl', 'rb') as f:
108
+ model = pickle.load(f)
109
+
110
+ # Load the feature columns
111
+ with open('X_columns.pkl', 'rb') as f:
112
+ X_columns = pickle.load(f)
113
+
114
  # Input URL
115
  url_input = st.text_input('Enter URL:', '')
116
 
 
 
 
117
  if st.button('Check URL'):
118
  if url_input:
119
  try:
120
+ # Make prediction
121
+ result = predict_url(url_input, model, X_columns)
 
122
 
123
+ if result == 'Phishing':
124
  st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
125
+ elif result == 'Legitimate':
126
  st.success('βœ… This URL is likely **Legitimate**.')
127
  else:
128
  st.warning('⚠️ Unable to determine. Try again later.')