MLDeveloper's picture
Update app.py
b093c24 verified
raw
history blame
2.04 kB
import streamlit as st
import pandas as pd
import joblib
import re
import string
# Page config
st.set_page_config(page_title="SMS Spam Detector", layout="centered")
st.title("📩 SMS Spam Detection App")
st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**")
# --- Load CSV for reference or stats ---
csv_path = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv"
try:
df = pd.read_csv(csv_path)
except Exception as e:
st.error(f"Error loading dataset: {e}")
# --- Load trained model & vectorizer ---
model = joblib.load("model/spam_model.pkl") # ✅ your trained model
vectorizer = joblib.load("model/tfidf_vectorizer.pkl") # ✅ your TF-IDF vectorizer
# --- Text Cleaning Function ---
def clean_text(text):
text = text.lower()
text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
text = re.sub(r'\@w+|\#','', text)
text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\d+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
return text.strip()
# --- Prediction Function ---
def predict_spam(message):
cleaned = clean_text(message)
vector = vectorizer.transform([cleaned])
prediction = model.predict(vector)
return "Spam" if prediction[0] == 1 else "Not Spam"
# --- UI for prediction ---
user_input = st.text_area("✉️ Enter your SMS message here:")
if st.button("Check Message"):
if user_input.strip() == "":
st.warning("⚠️ Please enter a valid message.")
else:
result = predict_spam(user_input)
if result == "Spam":
st.error("🚫 This message is classified as **SPAM**.")
else:
st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
# Optional: Show dataset preview
with st.expander("📄 View sample dataset (CSV)"):
st.dataframe(df.head())
st.markdown("---")
st.markdown("🔒 **Note**: This is a demo model and not intended for production use without proper testing.")