Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import joblib | |
import re | |
import string | |
# Page config | |
st.set_page_config(page_title="SMS Spam Detector", layout="centered") | |
st.title("📩 SMS Spam Detection App") | |
st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**") | |
# --- Load CSV for reference or stats --- | |
csv_path = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv" | |
try: | |
df = pd.read_csv(csv_path) | |
except Exception as e: | |
st.error(f"Error loading dataset: {e}") | |
# --- Load trained model & vectorizer --- | |
model = joblib.load("model/spam_model.pkl") # ✅ your trained model | |
vectorizer = joblib.load("model/tfidf_vectorizer.pkl") # ✅ your TF-IDF vectorizer | |
# --- Text Cleaning Function --- | |
def clean_text(text): | |
text = text.lower() | |
text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE) | |
text = re.sub(r'\@w+|\#','', text) | |
text = re.sub(r'[^\w\s]', '', text) | |
text = re.sub(r'\d+', '', text) | |
text = text.translate(str.maketrans('', '', string.punctuation)) | |
return text.strip() | |
# --- Prediction Function --- | |
def predict_spam(message): | |
cleaned = clean_text(message) | |
vector = vectorizer.transform([cleaned]) | |
prediction = model.predict(vector) | |
return "Spam" if prediction[0] == 1 else "Not Spam" | |
# --- UI for prediction --- | |
user_input = st.text_area("✉️ Enter your SMS message here:") | |
if st.button("Check Message"): | |
if user_input.strip() == "": | |
st.warning("⚠️ Please enter a valid message.") | |
else: | |
result = predict_spam(user_input) | |
if result == "Spam": | |
st.error("🚫 This message is classified as **SPAM**.") | |
else: | |
st.success("✅ This message is classified as **NOT SPAM (HAM)**.") | |
# Optional: Show dataset preview | |
with st.expander("📄 View sample dataset (CSV)"): | |
st.dataframe(df.head()) | |
st.markdown("---") | |
st.markdown("🔒 **Note**: This is a demo model and not intended for production use without proper testing.") | |