Spaces:
Sleeping
Sleeping
File size: 3,776 Bytes
a79a79d 526e0de 9097a5e 526e0de 9097a5e 526e0de a79a79d 526e0de 28ff01a 526e0de 18d390b a79a79d 526e0de ea95415 a79a79d 526e0de a79a79d 526e0de a79a79d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
import requests
import pandas as pd
import socket
import whois
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from datetime import datetime
import pickle
def extract_features(url):
try:
socket.inet_aton(urlparse(url).netloc)
having_IP_Address = 1
except:
having_IP_Address = 0
URL_Length = 1 if len(url) >= 54 else 0
try:
response = requests.get(url, timeout=5)
soup = BeautifulSoup(response.content, "html.parser")
anchors = soup.find_all("a", href=True)
if len(anchors) == 0:
URL_of_Anchor = 1
else:
unsafe = [a for a in anchors if not a['href'].startswith(url)]
URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
except:
URL_of_Anchor = 1
try:
domain_info = whois.whois(urlparse(url).netloc)
if isinstance(domain_info.creation_date, list):
creation_date = domain_info.creation_date[0]
else:
creation_date = domain_info.creation_date
age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
except:
age_of_domain = 0
SSLfinal_State = 1 if url.startswith("https") else 0
try:
request_response = requests.get(url, timeout=5)
if request_response.url == url:
Request_URL = 0
else:
Request_URL = 1
except:
Request_URL = 1
try:
forms = soup.find_all("form", action=True)
if len(forms) == 0:
SFH = 1
else:
for form in forms:
if form['action'] == "about:blank" or not form['action'].startswith("http"):
SFH = 1
break
else:
SFH = 0
except:
SFH = 1
try:
if "window.open" in response.text:
popUpWidnow = 1
else:
popUpWidnow = 0
except:
popUpWidnow = 0
return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]
def predict_url(url, model):
features = extract_features(url)
X_columns = ['SFH', 'popUpWidnow', 'SSLfinal_State', 'Request_URL', 'URL_of_Anchor', 'URL_Length', 'age_of_domain', 'having_IP_Address']
features_df = pd.DataFrame([features], columns=X_columns)
prediction = model.predict(features_df)
if prediction[0] == 1:
return "Phishing"
elif prediction[0] == 0:
return "Legitimate"
else:
return "Unknown"
# Streamlit app configuration
st.set_page_config(page_title='Phishing URL Detection', layout='centered')
# App Header
st.markdown("""
<style>
body { background-color: #f0f2f6; }
.main { background-color: white; padding: 2rem; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
</style>
""", unsafe_allow_html=True)
st.title('π Phishing URL Detection App')
st.write('Enter a URL to check if it is Phishing or Legitimate.')
# Load the trained model
with open('phishing_model.pkl', 'rb') as f:
model = pickle.load(f)
# Input URL
url_input = st.text_input('Enter URL:', '')
if st.button('Check URL'):
if url_input:
try:
# Make prediction
result = predict_url(url_input, model)
if result == 'Phishing':
st.error('π¨ This URL is likely a **Phishing Site**. Be careful!')
elif result == 'Legitimate':
st.success('β
This URL is likely **Legitimate**.')
else:
st.warning('β οΈ Unable to determine. Try again later.')
except Exception as e:
st.error(f'Error: {e}')
else:
st.warning('Please enter a valid URL.') |