Spaces:
Sleeping
Sleeping
File size: 2,306 Bytes
8f460b5 1840ab8 308314b 00b4891 4f6ca42 d481617 00b4891 308314b 00b4891 308314b 00b4891 4f6ca42 00b4891 308314b 00b4891 308314b 00b4891 308314b 00b4891 308314b 00b4891 308314b 00b4891 308314b 00b4891 483b677 308314b f4ba322 00b4891 ef88b24 00b4891 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import streamlit as st
import pandas as pd
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
# Title & Intro
st.set_page_config(page_title="SMS Spam Detection", layout="centered")
st.title("📩 SMS Spam Detection App")
st.markdown("🔍 Enter an SMS message below to check if it's **Spam** or **Not Spam (Ham)**")
# --- Load CSV Dataset ---
@st.cache_data
def load_data():
url = "https://huggingface.co/datasets/MLDeveloper/spam_sms_dataset/resolve/main/spam.csv"
df = pd.read_csv(url, encoding='latin-1')
df = df[['v1', 'v2']]
df.columns = ['label', 'message']
return df
df = load_data()
# --- Preprocessing ---
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
# --- Train Model ---
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)
# --- Clean Text Function ---
def clean_text(text):
text = text.lower()
text = re.sub(r"http\S+|www\S+|https\S+", '', text)
text = re.sub(r'\@w+|\#','', text)
text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\d+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
return text.strip()
# --- Predict Function ---
def predict_spam(text):
cleaned = clean_text(text)
vector = vectorizer.transform([cleaned])
prediction = model.predict(vector)
return "Spam" if prediction[0] == 1 else "Not Spam (Ham)"
# --- Input ---
user_input = st.text_area("✉️ Enter your SMS message here:")
if st.button("Check Message"):
if user_input.strip() == "":
st.warning("⚠️ Please enter a message.")
else:
result = predict_spam(user_input)
if result == "Spam":
st.error("🚫 This message is classified as **SPAM**.")
else:
st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
# --- Dataset preview ---
with st.expander("📄 View sample dataset"):
st.dataframe(df.head())
st.markdown("---")
st.markdown("🔒 *Note: This app is for educational purposes only.*")
|