MLDeveloper commited on
Commit
00b4891
·
verified ·
1 Parent(s): 52aca74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -37
app.py CHANGED
@@ -2,60 +2,61 @@ import streamlit as st
2
  import pandas as pd
3
  import re
4
  import string
 
5
  from sklearn.feature_extraction.text import TfidfVectorizer
6
  from sklearn.naive_bayes import MultinomialNB
7
- from sklearn.model_selection import train_test_split
8
 
9
- # Page config
10
- st.set_page_config(page_title="SMS Spam Detector", layout="centered")
11
  st.title("📩 SMS Spam Detection App")
12
- st.markdown("🔍 Enter a message below to check if it's **Spam** or **Not Spam (Ham)**")
13
 
14
- # --- Load dataset ---
15
- csv_url = "https://huggingface.co/spaces/MLDeveloper/Spam_SMS_Detection/resolve/main/spam.csv"
16
- try:
17
- df = pd.read_csv(csv_url, encoding='latin-1')[['v1', 'v2']]
 
 
18
  df.columns = ['label', 'message']
19
- df['label'] = df['label'].map({'ham': 0, 'spam': 1})
20
- except Exception as e:
21
- st.error(f"Failed to load CSV: {e}")
22
- st.stop()
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # --- Text Cleaning Function ---
25
  def clean_text(text):
26
  text = text.lower()
27
- text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
28
  text = re.sub(r'\@w+|\#','', text)
29
  text = re.sub(r'[^\w\s]', '', text)
30
  text = re.sub(r'\d+', '', text)
31
  text = text.translate(str.maketrans('', '', string.punctuation))
32
  return text.strip()
33
 
34
- df['cleaned'] = df['message'].apply(clean_text)
35
-
36
- # --- Train model ---
37
- X = df['cleaned']
38
- y = df['label']
39
-
40
- vectorizer = TfidfVectorizer()
41
- X_vec = vectorizer.fit_transform(X)
42
-
43
- model = MultinomialNB()
44
- model.fit(X_vec, y)
45
-
46
- # --- Prediction Function ---
47
- def predict_spam(message):
48
- cleaned = clean_text(message)
49
  vector = vectorizer.transform([cleaned])
50
  prediction = model.predict(vector)
51
- return "Spam" if prediction[0] == 1 else "Not Spam"
52
 
53
- # --- UI ---
54
  user_input = st.text_area("✉️ Enter your SMS message here:")
55
 
56
  if st.button("Check Message"):
57
  if user_input.strip() == "":
58
- st.warning("⚠️ Please enter a valid message.")
59
  else:
60
  result = predict_spam(user_input)
61
  if result == "Spam":
@@ -63,10 +64,9 @@ if st.button("Check Message"):
63
  else:
64
  st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
65
 
66
- # Optional: View CSV
67
- with st.expander("📄 View sample dataset (CSV)"):
68
- st.dataframe(df[['label', 'message']].head())
69
 
70
  st.markdown("---")
71
- st.markdown("🔒 **Note**: Model is trained in real-time from CSV and not saved for reuse. Ideal for demo purposes.**")
72
-
 
2
  import pandas as pd
3
  import re
4
  import string
5
+ from sklearn.model_selection import train_test_split
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
  from sklearn.naive_bayes import MultinomialNB
 
8
 
9
+ # Title & Intro
10
+ st.set_page_config(page_title="SMS Spam Detection", layout="centered")
11
  st.title("📩 SMS Spam Detection App")
12
+ st.markdown("🔍 Enter an SMS message below to check if it's **Spam** or **Not Spam (Ham)**")
13
 
14
+ # --- Load CSV Dataset ---
15
+ @st.cache_data
16
+ def load_data():
17
+ url = "https://huggingface.co/datasets/MLDeveloper/spam_sms_dataset/resolve/main/spam.csv"
18
+ df = pd.read_csv(url, encoding='latin-1')
19
+ df = df[['v1', 'v2']]
20
  df.columns = ['label', 'message']
21
+ return df
22
+
23
+ df = load_data()
24
+
25
+ # --- Preprocessing ---
26
+ df['label'] = df['label'].map({'ham': 0, 'spam': 1})
27
+
28
+ # --- Train Model ---
29
+ X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)
30
+
31
+ vectorizer = TfidfVectorizer()
32
+ X_train_tfidf = vectorizer.fit_transform(X_train)
33
+
34
+ model = MultinomialNB()
35
+ model.fit(X_train_tfidf, y_train)
36
 
37
+ # --- Clean Text Function ---
38
  def clean_text(text):
39
  text = text.lower()
40
+ text = re.sub(r"http\S+|www\S+|https\S+", '', text)
41
  text = re.sub(r'\@w+|\#','', text)
42
  text = re.sub(r'[^\w\s]', '', text)
43
  text = re.sub(r'\d+', '', text)
44
  text = text.translate(str.maketrans('', '', string.punctuation))
45
  return text.strip()
46
 
47
+ # --- Predict Function ---
48
+ def predict_spam(text):
49
+ cleaned = clean_text(text)
 
 
 
 
 
 
 
 
 
 
 
 
50
  vector = vectorizer.transform([cleaned])
51
  prediction = model.predict(vector)
52
+ return "Spam" if prediction[0] == 1 else "Not Spam (Ham)"
53
 
54
+ # --- Input ---
55
  user_input = st.text_area("✉️ Enter your SMS message here:")
56
 
57
  if st.button("Check Message"):
58
  if user_input.strip() == "":
59
+ st.warning("⚠️ Please enter a message.")
60
  else:
61
  result = predict_spam(user_input)
62
  if result == "Spam":
 
64
  else:
65
  st.success("✅ This message is classified as **NOT SPAM (HAM)**.")
66
 
67
+ # --- Dataset preview ---
68
+ with st.expander("📄 View sample dataset"):
69
+ st.dataframe(df.head())
70
 
71
  st.markdown("---")
72
+ st.markdown("🔒 *Note: This app is for educational purposes only.*")