ayeshaishaq004's picture
Update app.py
ea95415 verified
import streamlit as st
import requests
import pandas as pd
import socket
import whois
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from datetime import datetime
import pickle
def extract_features(url):
try:
socket.inet_aton(urlparse(url).netloc)
having_IP_Address = 1
except:
having_IP_Address = 0
URL_Length = 1 if len(url) >= 54 else 0
try:
response = requests.get(url, timeout=5)
soup = BeautifulSoup(response.content, "html.parser")
anchors = soup.find_all("a", href=True)
if len(anchors) == 0:
URL_of_Anchor = 1
else:
unsafe = [a for a in anchors if not a['href'].startswith(url)]
URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
except:
URL_of_Anchor = 1
try:
domain_info = whois.whois(urlparse(url).netloc)
if isinstance(domain_info.creation_date, list):
creation_date = domain_info.creation_date[0]
else:
creation_date = domain_info.creation_date
age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
except:
age_of_domain = 0
SSLfinal_State = 1 if url.startswith("https") else 0
try:
request_response = requests.get(url, timeout=5)
if request_response.url == url:
Request_URL = 0
else:
Request_URL = 1
except:
Request_URL = 1
try:
forms = soup.find_all("form", action=True)
if len(forms) == 0:
SFH = 1
else:
for form in forms:
if form['action'] == "about:blank" or not form['action'].startswith("http"):
SFH = 1
break
else:
SFH = 0
except:
SFH = 1
try:
if "window.open" in response.text:
popUpWidnow = 1
else:
popUpWidnow = 0
except:
popUpWidnow = 0
return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]
def predict_url(url, model):
features = extract_features(url)
X_columns = ['SFH', 'popUpWidnow', 'SSLfinal_State', 'Request_URL', 'URL_of_Anchor', 'URL_Length', 'age_of_domain', 'having_IP_Address']
features_df = pd.DataFrame([features], columns=X_columns)
prediction = model.predict(features_df)
if prediction[0] == 1:
return "Phishing"
elif prediction[0] == 0:
return "Legitimate"
else:
return "Unknown"
# Streamlit app configuration
st.set_page_config(page_title='Phishing URL Detection', layout='centered')
# App Header
st.markdown("""
<style>
body { background-color: #f0f2f6; }
.main { background-color: white; padding: 2rem; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
</style>
""", unsafe_allow_html=True)
st.title('πŸ” Phishing URL Detection App')
st.write('Enter a URL to check if it is Phishing or Legitimate.')
# Load the trained model
with open('phishing_model.pkl', 'rb') as f:
model = pickle.load(f)
# Input URL
url_input = st.text_input('Enter URL:', '')
if st.button('Check URL'):
if url_input:
try:
# Make prediction
result = predict_url(url_input, model)
if result == 'Phishing':
st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
elif result == 'Legitimate':
st.success('βœ… This URL is likely **Legitimate**.')
else:
st.warning('⚠️ Unable to determine. Try again later.')
except Exception as e:
st.error(f'Error: {e}')
else:
st.warning('Please enter a valid URL.')