Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,24 +1,25 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import joblib
|
4 |
import re
|
5 |
import string
|
|
|
|
|
|
|
6 |
|
7 |
# Page config
|
8 |
st.set_page_config(page_title="SMS Spam Detector", layout="centered")
|
9 |
st.title("📩 SMS Spam Detection App")
|
10 |
st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**")
|
11 |
|
12 |
-
# --- Load
|
13 |
-
|
14 |
try:
|
15 |
-
df = pd.read_csv(
|
|
|
|
|
16 |
except Exception as e:
|
17 |
-
st.error(f"
|
18 |
-
|
19 |
-
# --- Load trained model & vectorizer ---
|
20 |
-
model = joblib.load("model/spam_model.pkl") # ✅ your trained model
|
21 |
-
vectorizer = joblib.load("model/tfidf_vectorizer.pkl") # ✅ your TF-IDF vectorizer
|
22 |
|
23 |
# --- Text Cleaning Function ---
|
24 |
def clean_text(text):
|
@@ -30,6 +31,18 @@ def clean_text(text):
|
|
30 |
text = text.translate(str.maketrans('', '', string.punctuation))
|
31 |
return text.strip()
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# --- Prediction Function ---
|
34 |
def predict_spam(message):
|
35 |
cleaned = clean_text(message)
|
@@ -37,7 +50,7 @@ def predict_spam(message):
|
|
37 |
prediction = model.predict(vector)
|
38 |
return "Spam" if prediction[0] == 1 else "Not Spam"
|
39 |
|
40 |
-
# --- UI
|
41 |
user_input = st.text_area("✉️ Enter your SMS message here:")
|
42 |
|
43 |
if st.button("Check Message"):
|
@@ -50,9 +63,9 @@ if st.button("Check Message"):
|
|
50 |
else:
|
51 |
st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
|
52 |
|
53 |
-
# Optional:
|
54 |
with st.expander("📄 View sample dataset (CSV)"):
|
55 |
-
st.dataframe(df.head())
|
56 |
|
57 |
st.markdown("---")
|
58 |
-
st.markdown("🔒 **Note**:
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
import re
|
4 |
import string
|
5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
6 |
+
from sklearn.naive_bayes import MultinomialNB
|
7 |
+
from sklearn.model_selection import train_test_split
|
8 |
|
9 |
# Page config
|
10 |
st.set_page_config(page_title="SMS Spam Detector", layout="centered")
|
11 |
st.title("📩 SMS Spam Detection App")
|
12 |
st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**")
|
13 |
|
14 |
+
# --- Load dataset ---
|
15 |
+
csv_url = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv"
|
16 |
try:
|
17 |
+
df = pd.read_csv(csv_url, encoding='latin-1')[['v1', 'v2']]
|
18 |
+
df.columns = ['label', 'message']
|
19 |
+
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
|
20 |
except Exception as e:
|
21 |
+
st.error(f"Failed to load CSV: {e}")
|
22 |
+
st.stop()
|
|
|
|
|
|
|
23 |
|
24 |
# --- Text Cleaning Function ---
|
25 |
def clean_text(text):
|
|
|
31 |
text = text.translate(str.maketrans('', '', string.punctuation))
|
32 |
return text.strip()
|
33 |
|
34 |
+
df['cleaned'] = df['message'].apply(clean_text)
|
35 |
+
|
36 |
+
# --- Train model ---
|
37 |
+
X = df['cleaned']
|
38 |
+
y = df['label']
|
39 |
+
|
40 |
+
vectorizer = TfidfVectorizer()
|
41 |
+
X_vec = vectorizer.fit_transform(X)
|
42 |
+
|
43 |
+
model = MultinomialNB()
|
44 |
+
model.fit(X_vec, y)
|
45 |
+
|
46 |
# --- Prediction Function ---
|
47 |
def predict_spam(message):
|
48 |
cleaned = clean_text(message)
|
|
|
50 |
prediction = model.predict(vector)
|
51 |
return "Spam" if prediction[0] == 1 else "Not Spam"
|
52 |
|
53 |
+
# --- UI ---
|
54 |
user_input = st.text_area("✉️ Enter your SMS message here:")
|
55 |
|
56 |
if st.button("Check Message"):
|
|
|
63 |
else:
|
64 |
st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
|
65 |
|
66 |
+
# Optional: View CSV
|
67 |
with st.expander("📄 View sample dataset (CSV)"):
|
68 |
+
st.dataframe(df[['label', 'message']].head())
|
69 |
|
70 |
st.markdown("---")
|
71 |
+
st.markdown("🔒 **Note**: Model is trained in real-time from CSV and not saved for reuse. Ideal for demo purposes.**")
|