import streamlit as st import pandas as pd import joblib import re import string # Page config st.set_page_config(page_title="SMS Spam Detector", layout="centered") st.title("📩 SMS Spam Detection App") st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**") # --- Load CSV for reference or stats --- csv_path = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam_sms_detection.csv" try: df = pd.read_csv(csv_path) except Exception as e: st.error(f"Error loading dataset: {e}") # --- Load trained model & vectorizer --- model = joblib.load("model/spam_model.pkl") # ✅ your trained model vectorizer = joblib.load("model/tfidf_vectorizer.pkl") # ✅ your TF-IDF vectorizer # --- Text Cleaning Function --- def clean_text(text): text = text.lower() text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE) text = re.sub(r'\@w+|\#','', text) text = re.sub(r'[^\w\s]', '', text) text = re.sub(r'\d+', '', text) text = text.translate(str.maketrans('', '', string.punctuation)) return text.strip() # --- Prediction Function --- def predict_spam(message): cleaned = clean_text(message) vector = vectorizer.transform([cleaned]) prediction = model.predict(vector) return "Spam" if prediction[0] == 1 else "Not Spam" # --- UI for prediction --- user_input = st.text_area("✉️ Enter your SMS message here:") if st.button("Check Message"): if user_input.strip() == "": st.warning("⚠️ Please enter a valid message.") else: result = predict_spam(user_input) if result == "Spam": st.error("🚫 This message is classified as **SPAM**.") else: st.success("✅ This message is classified as **NOT SPAM (HAM)**.") # Optional: Show dataset preview with st.expander("📄 View sample dataset (CSV)"): st.dataframe(df.head()) st.markdown("---") st.markdown("🔒 **Note**: This is a demo model and not intended for production use without proper testing.")