File size: 2,306 Bytes
8f460b5
1840ab8
308314b
 
00b4891
4f6ca42
 
d481617
00b4891
 
308314b
00b4891
308314b
00b4891
 
 
 
 
 
4f6ca42
00b4891
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308314b
00b4891
308314b
 
00b4891
308314b
 
 
 
 
 
00b4891
 
 
308314b
 
00b4891
308314b
00b4891
308314b
 
 
 
00b4891
483b677
308314b
 
 
 
 
f4ba322
00b4891
 
 
ef88b24
 
00b4891
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import pandas as pd
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

# Title & Intro
st.set_page_config(page_title="SMS Spam Detection", layout="centered")
st.title("📩 SMS Spam Detection App")
st.markdown("🔍 Enter an SMS message below to check if it's **Spam** or **Not Spam (Ham)**")

# --- Load CSV Dataset ---
@st.cache_data
def load_data():
    url = "https://huggingface.co/datasets/MLDeveloper/spam_sms_dataset/resolve/main/spam.csv"
    df = pd.read_csv(url, encoding='latin-1')
    df = df[['v1', 'v2']]
    df.columns = ['label', 'message']
    return df

df = load_data()

# --- Preprocessing ---
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# --- Train Model ---
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)

model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# --- Clean Text Function ---
def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = re.sub(r'\@w+|\#','', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text.strip()

# --- Predict Function ---
def predict_spam(text):
    cleaned = clean_text(text)
    vector = vectorizer.transform([cleaned])
    prediction = model.predict(vector)
    return "Spam" if prediction[0] == 1 else "Not Spam (Ham)"

# --- Input ---
user_input = st.text_area("✉️ Enter your SMS message here:")

if st.button("Check Message"):
    if user_input.strip() == "":
        st.warning("⚠️ Please enter a message.")
    else:
        result = predict_spam(user_input)
        if result == "Spam":
            st.error("🚫 This message is classified as **SPAM**.")
        else:
            st.success("✅ This message is classified as **NOT SPAM (HAM)**.")

# --- Dataset preview ---
with st.expander("📄 View sample dataset"):
    st.dataframe(df.head())

st.markdown("---")
st.markdown("🔒 *Note: This app is for educational purposes only.*")