MLDeveloper commited on
Commit
4f6ca42
·
verified ·
1 Parent(s): dd57252

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -13
app.py CHANGED
@@ -1,24 +1,25 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import joblib
4
  import re
5
  import string
 
 
 
6
 
7
  # Page config
8
  st.set_page_config(page_title="SMS Spam Detector", layout="centered")
9
  st.title("📩 SMS Spam Detection App")
10
  st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**")
11
 
12
- # --- Load CSV for reference or stats ---
13
- csv_path = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv"
14
  try:
15
- df = pd.read_csv(csv_path)
 
 
16
  except Exception as e:
17
- st.error(f"Error loading dataset: {e}")
18
-
19
- # --- Load trained model & vectorizer ---
20
- model = joblib.load("model/spam_model.pkl") # ✅ your trained model
21
- vectorizer = joblib.load("model/tfidf_vectorizer.pkl") # ✅ your TF-IDF vectorizer
22
 
23
  # --- Text Cleaning Function ---
24
  def clean_text(text):
@@ -30,6 +31,18 @@ def clean_text(text):
30
  text = text.translate(str.maketrans('', '', string.punctuation))
31
  return text.strip()
32
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # --- Prediction Function ---
34
  def predict_spam(message):
35
  cleaned = clean_text(message)
@@ -37,7 +50,7 @@ def predict_spam(message):
37
  prediction = model.predict(vector)
38
  return "Spam" if prediction[0] == 1 else "Not Spam"
39
 
40
- # --- UI for prediction ---
41
  user_input = st.text_area("✉️ Enter your SMS message here:")
42
 
43
  if st.button("Check Message"):
@@ -50,9 +63,9 @@ if st.button("Check Message"):
50
  else:
51
  st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
52
 
53
- # Optional: Show dataset preview
54
  with st.expander("📄 View sample dataset (CSV)"):
55
- st.dataframe(df.head())
56
 
57
  st.markdown("---")
58
- st.markdown("🔒 **Note**: This is a demo model and not intended for production use without proper testing.")
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  import re
4
  import string
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.naive_bayes import MultinomialNB
7
+ from sklearn.model_selection import train_test_split
8
 
9
  # Page config
10
  st.set_page_config(page_title="SMS Spam Detector", layout="centered")
11
  st.title("📩 SMS Spam Detection App")
12
  st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**")
13
 
14
+ # --- Load dataset ---
15
+ csv_url = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv"
16
  try:
17
+ df = pd.read_csv(csv_url, encoding='latin-1')[['v1', 'v2']]
18
+ df.columns = ['label', 'message']
19
+ df['label'] = df['label'].map({'ham': 0, 'spam': 1})
20
  except Exception as e:
21
+ st.error(f"Failed to load CSV: {e}")
22
+ st.stop()
 
 
 
23
 
24
  # --- Text Cleaning Function ---
25
  def clean_text(text):
 
31
  text = text.translate(str.maketrans('', '', string.punctuation))
32
  return text.strip()
33
 
34
+ df['cleaned'] = df['message'].apply(clean_text)
35
+
36
+ # --- Train model ---
37
+ X = df['cleaned']
38
+ y = df['label']
39
+
40
+ vectorizer = TfidfVectorizer()
41
+ X_vec = vectorizer.fit_transform(X)
42
+
43
+ model = MultinomialNB()
44
+ model.fit(X_vec, y)
45
+
46
  # --- Prediction Function ---
47
  def predict_spam(message):
48
  cleaned = clean_text(message)
 
50
  prediction = model.predict(vector)
51
  return "Spam" if prediction[0] == 1 else "Not Spam"
52
 
53
+ # --- UI ---
54
  user_input = st.text_area("✉️ Enter your SMS message here:")
55
 
56
  if st.button("Check Message"):
 
63
  else:
64
  st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
65
 
66
+ # Optional: View CSV
67
  with st.expander("📄 View sample dataset (CSV)"):
68
+ st.dataframe(df[['label', 'message']].head())
69
 
70
  st.markdown("---")
71
+ st.markdown("🔒 **Note**: Model is trained in real-time from CSV and not saved for reuse. Ideal for demo purposes.**")