Spaces:
Sleeping
Sleeping
File size: 2,332 Bytes
8f460b5 1840ab8 308314b 4f6ca42 d481617 ef88b24 308314b 4f6ca42 7d00f2b ef88b24 4f6ca42 ef88b24 4f6ca42 308314b 4f6ca42 308314b 4f6ca42 308314b 483b677 308314b f4ba322 4f6ca42 ef88b24 4f6ca42 ef88b24 7d00f2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import streamlit as st
import pandas as pd
import re
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
# Page config
st.set_page_config(page_title="SMS Spam Detector", layout="centered")
st.title("📩 SMS Spam Detection App")
st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**")
# --- Load dataset ---
csv_url = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv"
try:
df = pd.read_csv(csv_url, encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'message']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
except Exception as e:
st.error(f"Failed to load CSV: {e}")
st.stop()
# --- Text Cleaning Function ---
def clean_text(text):
text = text.lower()
text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
text = re.sub(r'\@w+|\#','', text)
text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\d+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
return text.strip()
df['cleaned'] = df['message'].apply(clean_text)
# --- Train model ---
X = df['cleaned']
y = df['label']
vectorizer = TfidfVectorizer()
X_vec = vectorizer.fit_transform(X)
model = MultinomialNB()
model.fit(X_vec, y)
# --- Prediction Function ---
def predict_spam(message):
cleaned = clean_text(message)
vector = vectorizer.transform([cleaned])
prediction = model.predict(vector)
return "Spam" if prediction[0] == 1 else "Not Spam"
# --- UI ---
user_input = st.text_area("✉️ Enter your SMS message here:")
if st.button("Check Message"):
if user_input.strip() == "":
st.warning("⚠️ Please enter a valid message.")
else:
result = predict_spam(user_input)
if result == "Spam":
st.error("🚫 This message is classified as **SPAM**.")
else:
st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
# Optional: View CSV
with st.expander("📄 View sample dataset (CSV)"):
st.dataframe(df[['label', 'message']].head())
st.markdown("---")
st.markdown("🔒 **Note**: Model is trained in real-time from CSV and not saved for reuse. Ideal for demo purposes.**")
|