Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import pandas as pd | |
import socket | |
import whois | |
from urllib.parse import urlparse | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
import pickle | |
def extract_features(url): | |
try: | |
socket.inet_aton(urlparse(url).netloc) | |
having_IP_Address = 1 | |
except: | |
having_IP_Address = 0 | |
URL_Length = 1 if len(url) >= 54 else 0 | |
try: | |
response = requests.get(url, timeout=5) | |
soup = BeautifulSoup(response.content, "html.parser") | |
anchors = soup.find_all("a", href=True) | |
if len(anchors) == 0: | |
URL_of_Anchor = 1 | |
else: | |
unsafe = [a for a in anchors if not a['href'].startswith(url)] | |
URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0 | |
except: | |
URL_of_Anchor = 1 | |
try: | |
domain_info = whois.whois(urlparse(url).netloc) | |
if isinstance(domain_info.creation_date, list): | |
creation_date = domain_info.creation_date[0] | |
else: | |
creation_date = domain_info.creation_date | |
age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0 | |
except: | |
age_of_domain = 0 | |
SSLfinal_State = 1 if url.startswith("https") else 0 | |
try: | |
request_response = requests.get(url, timeout=5) | |
if request_response.url == url: | |
Request_URL = 0 | |
else: | |
Request_URL = 1 | |
except: | |
Request_URL = 1 | |
try: | |
forms = soup.find_all("form", action=True) | |
if len(forms) == 0: | |
SFH = 1 | |
else: | |
for form in forms: | |
if form['action'] == "about:blank" or not form['action'].startswith("http"): | |
SFH = 1 | |
break | |
else: | |
SFH = 0 | |
except: | |
SFH = 1 | |
try: | |
if "window.open" in response.text: | |
popUpWidnow = 1 | |
else: | |
popUpWidnow = 0 | |
except: | |
popUpWidnow = 0 | |
return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address] | |
def predict_url(url, model): | |
features = extract_features(url) | |
X_columns = ['SFH', 'popUpWidnow', 'SSLfinal_State', 'Request_URL', 'URL_of_Anchor', 'URL_Length', 'age_of_domain', 'having_IP_Address'] | |
features_df = pd.DataFrame([features], columns=X_columns) | |
prediction = model.predict(features_df) | |
if prediction[0] == 1: | |
return "Phishing" | |
elif prediction[0] == 0: | |
return "Legitimate" | |
else: | |
return "Unknown" | |
# Streamlit app configuration | |
st.set_page_config(page_title='Phishing URL Detection', layout='centered') | |
# App Header | |
st.markdown(""" | |
<style> | |
body { background-color: #f0f2f6; } | |
.main { background-color: white; padding: 2rem; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); } | |
</style> | |
""", unsafe_allow_html=True) | |
st.title('π Phishing URL Detection App') | |
st.write('Enter a URL to check if it is Phishing or Legitimate.') | |
# Load the trained model | |
with open('phishing_model.pkl', 'rb') as f: | |
model = pickle.load(f) | |
# Input URL | |
url_input = st.text_input('Enter URL:', '') | |
if st.button('Check URL'): | |
if url_input: | |
try: | |
# Make prediction | |
result = predict_url(url_input, model) | |
if result == 'Phishing': | |
st.error('π¨ This URL is likely a **Phishing Site**. Be careful!') | |
elif result == 'Legitimate': | |
st.success('β This URL is likely **Legitimate**.') | |
else: | |
st.warning('β οΈ Unable to determine. Try again later.') | |
except Exception as e: | |
st.error(f'Error: {e}') | |
else: | |
st.warning('Please enter a valid URL.') |