kshanmukha1501 commited on
Commit
44b5c03
Β·
1 Parent(s): 5015aa6

Deploy complete web application with full functionality

Browse files
Files changed (1) hide show
  1. app.py +381 -91
app.py CHANGED
@@ -5,119 +5,409 @@ import json
5
  import base64
6
  import logging
7
  from pathlib import Path
8
-
9
- # Add parent directory to path to import the main app modules
10
- parent_dir = str(Path(__file__).parent.parent)
11
- sys.path.insert(0, parent_dir)
 
12
 
13
  # Set up logging
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
- # Import the ML components from the main app
18
- from ml_suite.predictor import initialize_predictor, get_ai_prediction_for_email, is_predictor_ready, get_model_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # Initialize the predictor once when the app starts
21
- print("Initializing AI model...")
22
- initialize_predictor(logger)
 
 
 
 
 
 
23
 
24
- def parse_email_content(email_text):
25
- """Parse email content to extract subject and body"""
26
- lines = email_text.strip().split('\n')
27
- subject = ""
28
- body = ""
29
-
30
- # Simple parsing - look for Subject: line
31
- for i, line in enumerate(lines):
32
- if line.lower().startswith('subject:'):
33
- subject = line[8:].strip()
34
- body = '\n'.join(lines[i+1:]).strip()
35
- break
36
-
37
- if not subject and not body:
38
- # If no subject line found, treat entire text as body
39
- body = email_text
40
-
41
- return subject, body
42
-
43
- def classify_email(email_content):
44
- """Classify email using the AI model"""
45
- if not email_content.strip():
46
- return "Please enter email content to analyze."
47
-
48
- # Check if model is ready
49
- if not is_predictor_ready():
50
- status = get_model_status()
51
- return f"Model is not ready. Status: {status}"
52
-
53
- # Parse email content
54
- subject, body = parse_email_content(email_content)
55
-
56
- # Create email data structure similar to the main app
57
- email_data = {
58
- 'snippet': body[:200], # Gmail API typically provides snippets
59
- 'subject': subject,
60
- 'body': body,
61
- 'sender': 'demo@example.com', # Placeholder
62
- 'id': 'demo_id'
63
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  try:
66
- # Get prediction
67
- result = get_ai_prediction_for_email(email_data)
 
 
 
 
 
 
68
 
69
- # Format the response
70
- prediction = result.get('prediction', 'Unknown')
71
- confidence = result.get('confidence', 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- # Create formatted output
74
- output = f"""
75
- ## Classification Result
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- **Category:** {prediction}
78
- **Confidence:** {confidence:.2%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- ### Analysis:
 
 
 
 
81
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  if prediction == 'unsubscribe':
84
- output += "βœ… This email appears to be an unsubscribe confirmation or related to subscription management."
 
85
  elif prediction == 'important':
86
- output += "⚠️ This email appears to be important and should not be automatically processed."
 
87
  else:
88
- output += "❓ Unable to classify this email with high confidence."
 
89
 
90
- # Add confidence interpretation
91
- if confidence > 0.9:
92
- output += f"\n\n*High confidence prediction ({confidence:.2%})*"
93
- elif confidence > 0.7:
94
- output += f"\n\n*Moderate confidence prediction ({confidence:.2%})*"
95
- else:
96
- output += f"\n\n*Low confidence prediction ({confidence:.2%})*"
97
-
98
- return output
99
-
100
- except Exception as e:
101
- return f"Error during classification: {str(e)}"
102
 
103
- # Create Gradio interface
104
- demo = gr.Interface(
105
- fn=classify_email,
106
- inputs=gr.Textbox(
107
- lines=10,
108
- placeholder="Paste email content here...\n\nFormat:\nSubject: Your subscription has been cancelled\nBody text goes here...",
109
- label="Email Content"
110
- ),
111
- outputs=gr.Markdown(label="Classification Result"),
112
- title="Email Unsubscribe Classifier",
113
- description="This AI model classifies emails as either 'unsubscribe' confirmations or 'important' emails that should not be auto-processed.",
114
- examples=[
115
- ["Subject: Your subscription has been cancelled\n\nHi there,\n\nWe're sorry to see you go! Your subscription to our newsletter has been successfully cancelled. You will no longer receive emails from us.\n\nBest regards,\nThe Team"],
116
- ["Subject: Important: Your account security update\n\nDear Customer,\n\nWe've detected unusual activity on your account. Please review your recent transactions and update your password immediately.\n\nThank you,\nSecurity Team"],
117
- ["Subject: You've been unsubscribed\n\nYou have been removed from our mailing list and will not receive any further emails from us."],
118
- ],
119
- theme=gr.themes.Soft()
120
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  if __name__ == "__main__":
123
  demo.launch()
 
5
  import base64
6
  import logging
7
  from pathlib import Path
8
+ import uuid
9
+ import re
10
+ from datetime import datetime
11
+ from typing import Dict, List, Optional, Tuple
12
+ import pandas as pd
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
+ # Fix the model path for HF Space deployment
19
+ if os.path.exists('final_optimized_model'):
20
+ # Running in HF Space
21
+ MODEL_PATH = 'final_optimized_model'
22
+ else:
23
+ # Running locally
24
+ MODEL_PATH = os.path.join(Path(__file__).parent.parent, 'final_optimized_model')
25
+
26
+ # Import the ML components
27
+ sys.path.insert(0, str(Path(__file__).parent))
28
+ if os.path.exists('ml_suite'):
29
+ # Override the config to use local model path
30
+ import ml_suite.config as config
31
+ config.FINE_TUNED_MODEL_DIR = MODEL_PATH
32
+
33
+ from ml_suite.predictor import initialize_predictor, get_ai_prediction_for_email, is_predictor_ready, get_model_status
34
 
35
  # Initialize the predictor once when the app starts
36
+ logger.info(f"Initializing AI model from {MODEL_PATH}...")
37
+ if 'ml_suite' in sys.modules:
38
+ initialize_predictor(logger)
39
+ model_ready = is_predictor_ready()
40
+ logger.info(f"Model initialization status: {'Ready' if model_ready else 'Failed'}")
41
+ else:
42
+ model_ready = False
43
+ logger.error("ML suite not found")
44
 
45
+ # Store session data
46
+ session_data = {}
47
+
48
+ def create_session():
49
+ """Create a new session"""
50
+ session_id = str(uuid.uuid4())
51
+ session_data[session_id] = {
52
+ 'emails': [],
53
+ 'scan_history': [],
54
+ 'settings': {
55
+ 'ai_enabled': True,
56
+ 'confidence_threshold': 0.5
57
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
+ return session_id
60
+
61
+ def parse_email_batch(email_text):
62
+ """Parse batch email input"""
63
+ emails = []
64
+ current_email = {'subject': '', 'body': '', 'sender': ''}
65
+
66
+ lines = email_text.strip().split('\n')
67
+ current_section = None
68
+
69
+ for line in lines:
70
+ line = line.strip()
71
+
72
+ if line.lower().startswith('---'): # Email separator
73
+ if current_email['subject'] or current_email['body']:
74
+ emails.append(current_email)
75
+ current_email = {'subject': '', 'body': '', 'sender': ''}
76
+ current_section = None
77
+ elif line.lower().startswith('from:'):
78
+ current_email['sender'] = line[5:].strip()
79
+ current_section = 'sender'
80
+ elif line.lower().startswith('subject:'):
81
+ current_email['subject'] = line[8:].strip()
82
+ current_section = 'subject'
83
+ elif line.lower().startswith('body:'):
84
+ current_section = 'body'
85
+ elif line and current_section == 'body':
86
+ current_email['body'] += line + '\n'
87
+ elif line and current_section == 'subject' and not line.lower().startswith(('from:', 'body:')):
88
+ current_email['subject'] += ' ' + line
89
+
90
+ # Add last email
91
+ if current_email['subject'] or current_email['body']:
92
+ emails.append(current_email)
93
+
94
+ return emails
95
+
96
+ def classify_email(email_data):
97
+ """Classify a single email"""
98
+ if not model_ready:
99
+ return {
100
+ 'prediction': 'error',
101
+ 'confidence': 0,
102
+ 'error': 'Model not ready'
103
+ }
104
 
105
  try:
106
+ # Prepare email data for predictor
107
+ email_for_prediction = {
108
+ 'snippet': email_data.get('body', '')[:200],
109
+ 'subject': email_data.get('subject', ''),
110
+ 'body': email_data.get('body', ''),
111
+ 'sender': email_data.get('sender', 'unknown@example.com'),
112
+ 'id': str(uuid.uuid4())
113
+ }
114
 
115
+ result = get_ai_prediction_for_email(email_for_prediction)
116
+ return result
117
+ except Exception as e:
118
+ logger.error(f"Classification error: {str(e)}")
119
+ return {
120
+ 'prediction': 'error',
121
+ 'confidence': 0,
122
+ 'error': str(e)
123
+ }
124
+
125
+ def scan_emails(session_id, email_batch_text, ai_enabled, confidence_threshold):
126
+ """Scan a batch of emails"""
127
+ if session_id not in session_data:
128
+ session_id = create_session()
129
+
130
+ session = session_data[session_id]
131
+ session['settings']['ai_enabled'] = ai_enabled
132
+ session['settings']['confidence_threshold'] = confidence_threshold
133
+
134
+ # Parse emails
135
+ emails = parse_email_batch(email_batch_text)
136
+
137
+ if not emails:
138
+ return "No valid emails found in input.", None, session_id
139
+
140
+ results = []
141
+ unsubscribe_count = 0
142
+ important_count = 0
143
+
144
+ for email in emails:
145
+ if ai_enabled and model_ready:
146
+ classification = classify_email(email)
147
+ prediction = classification.get('prediction', 'unknown')
148
+ confidence = classification.get('confidence', 0)
149
+
150
+ if confidence >= confidence_threshold:
151
+ if prediction == 'unsubscribe':
152
+ unsubscribe_count += 1
153
+ status = "βœ… Unsubscribe"
154
+ else:
155
+ important_count += 1
156
+ status = "⚠️ Important"
157
+ else:
158
+ status = "❓ Uncertain"
159
+ else:
160
+ prediction = 'not_analyzed'
161
+ confidence = 0
162
+ status = "⏭️ Skipped (AI disabled)"
163
 
164
+ result = {
165
+ 'subject': email.get('subject', 'No subject'),
166
+ 'sender': email.get('sender', 'Unknown'),
167
+ 'prediction': prediction,
168
+ 'confidence': confidence,
169
+ 'status': status,
170
+ 'body_preview': email.get('body', '')[:100] + '...' if len(email.get('body', '')) > 100 else email.get('body', '')
171
+ }
172
+ results.append(result)
173
+ session['emails'].append(result)
174
+
175
+ # Create summary
176
+ summary = f"""
177
+ ## Scan Results
178
 
179
+ **Total Emails Scanned:** {len(results)}
180
+ **Unsubscribe Confirmations:** {unsubscribe_count}
181
+ **Important Emails:** {important_count}
182
+ **Uncertain:** {len(results) - unsubscribe_count - important_count}
183
+
184
+ ### Detailed Results:
185
+ """
186
+
187
+ for i, result in enumerate(results, 1):
188
+ summary += f"\n**{i}. {result['subject']}**\n"
189
+ summary += f"- From: {result['sender']}\n"
190
+ summary += f"- Status: {result['status']}\n"
191
+ if ai_enabled and result['confidence'] > 0:
192
+ summary += f"- Confidence: {result['confidence']:.2%}\n"
193
+ summary += f"- Preview: {result['body_preview']}\n"
194
+
195
+ # Create DataFrame for display
196
+ df_data = []
197
+ for r in results:
198
+ df_data.append({
199
+ 'Subject': r['subject'],
200
+ 'From': r['sender'],
201
+ 'Status': r['status'],
202
+ 'Confidence': f"{r['confidence']:.2%}" if r['confidence'] > 0 else "N/A",
203
+ 'Preview': r['body_preview'][:50] + '...'
204
+ })
205
+
206
+ df = pd.DataFrame(df_data) if df_data else None
207
+
208
+ # Add to scan history
209
+ session['scan_history'].append({
210
+ 'timestamp': datetime.now().isoformat(),
211
+ 'count': len(results),
212
+ 'unsubscribe': unsubscribe_count,
213
+ 'important': important_count
214
+ })
215
+
216
+ return summary, df, session_id
217
+
218
+ def get_statistics(session_id):
219
+ """Get session statistics"""
220
+ if session_id not in session_data:
221
+ return "No session data available."
222
+
223
+ session = session_data[session_id]
224
+ total_scans = len(session['scan_history'])
225
+ total_emails = sum(scan['count'] for scan in session['scan_history'])
226
+ total_unsubscribe = sum(scan['unsubscribe'] for scan in session['scan_history'])
227
+ total_important = sum(scan['important'] for scan in session['scan_history'])
228
+
229
+ stats = f"""
230
+ ## Session Statistics
231
+
232
+ **Total Scans:** {total_scans}
233
+ **Total Emails Processed:** {total_emails}
234
+ **Unsubscribe Emails Found:** {total_unsubscribe}
235
+ **Important Emails Protected:** {total_important}
236
 
237
+ ### Model Information:
238
+ - **Model:** DeBERTa-v3-small
239
+ - **Training Samples:** 20,000
240
+ - **Accuracy:** 100% on test set
241
+ - **Status:** {'🟒 Ready' if model_ready else 'πŸ”΄ Not Available'}
242
  """
243
+ return stats
244
+
245
+ # Create Gradio interface
246
+ with gr.Blocks(title="Gmail Unsubscriber - Full Web Version", theme=gr.themes.Soft()) as demo:
247
+ session_state = gr.State(create_session())
248
+
249
+ gr.Markdown("""
250
+ # πŸ“§ Gmail Unsubscriber - Web Version
251
+
252
+ This is a web-based version of the Gmail Unsubscriber application that uses AI to classify emails as unsubscribe confirmations or important emails.
253
+
254
+ **Note:** This web version demonstrates the AI classification capabilities. For full Gmail integration with OAuth, please use the desktop version.
255
+ """)
256
+
257
+ with gr.Tabs():
258
+ with gr.TabItem("πŸ“Š Email Scanner"):
259
+ gr.Markdown("### Batch Email Classification")
260
+
261
+ with gr.Row():
262
+ with gr.Column(scale=2):
263
+ email_input = gr.Textbox(
264
+ lines=15,
265
+ placeholder="""Paste multiple emails here. Format each email as:
266
+
267
+ From: sender@example.com
268
+ Subject: Your subscription has been cancelled
269
+ Body:
270
+ We're sorry to see you go! Your subscription has been cancelled.
271
+
272
+ ---
273
+
274
+ From: bank@example.com
275
+ Subject: Important: Security Alert
276
+ Body:
277
+ We detected unusual activity on your account. Please review immediately.
278
+
279
+ ---
280
+
281
+ (Continue with more emails...)""",
282
+ label="Email Batch Input"
283
+ )
284
+
285
+ with gr.Column(scale=1):
286
+ ai_enabled = gr.Checkbox(value=True, label="Enable AI Classification")
287
+ confidence_threshold = gr.Slider(
288
+ minimum=0.1,
289
+ maximum=0.9,
290
+ value=0.5,
291
+ step=0.1,
292
+ label="Confidence Threshold"
293
+ )
294
+ scan_btn = gr.Button("πŸ” Scan Emails", variant="primary", size="lg")
295
+
296
+ scan_output = gr.Markdown()
297
+ results_table = gr.DataFrame(label="Scan Results")
298
+
299
+ with gr.TabItem("πŸ“ˆ Statistics"):
300
+ stats_output = gr.Markdown()
301
+ refresh_stats_btn = gr.Button("πŸ”„ Refresh Statistics")
302
+
303
+ with gr.TabItem("πŸ§ͺ Test Single Email"):
304
+ gr.Markdown("### Test AI Classification on a Single Email")
305
+
306
+ with gr.Row():
307
+ with gr.Column():
308
+ test_subject = gr.Textbox(label="Subject", placeholder="Your subscription has been cancelled")
309
+ test_sender = gr.Textbox(label="From", placeholder="noreply@example.com")
310
+ test_body = gr.Textbox(
311
+ lines=5,
312
+ label="Body",
313
+ placeholder="We're sorry to see you go! Your subscription has been successfully cancelled."
314
+ )
315
+ test_btn = gr.Button("πŸ€– Classify", variant="primary")
316
+
317
+ with gr.Column():
318
+ test_output = gr.Markdown()
319
+
320
+ with gr.TabItem("ℹ️ About"):
321
+ gr.Markdown("""
322
+ ## About Gmail Unsubscriber
323
+
324
+ This application uses a fine-tuned DeBERTa-v3-small model to classify emails automatically.
325
+
326
+ ### Features:
327
+ - πŸ€– AI-powered email classification
328
+ - πŸ“Š Batch processing capabilities
329
+ - πŸ“ˆ Real-time statistics
330
+ - 🎯 Adjustable confidence thresholds
331
+
332
+ ### Model Performance:
333
+ - **Accuracy:** 100% on test set
334
+ - **F1 Score:** 1.0 for both classes
335
+ - **Model Size:** 552MB
336
+ - **Training Data:** 20,000 email samples
337
+
338
+ ### Desktop Version Features (Not available in web):
339
+ - Gmail OAuth integration
340
+ - Automatic email fetching
341
+ - One-click unsubscribe
342
+ - Email archiving
343
+ - Persistent user settings
344
+ """)
345
+
346
+ # Event handlers
347
+ def test_single_email(subject, sender, body):
348
+ if not subject and not body:
349
+ return "Please enter email content to test."
350
+
351
+ email_data = {
352
+ 'subject': subject,
353
+ 'sender': sender,
354
+ 'body': body
355
+ }
356
+
357
+ result = classify_email(email_data)
358
+
359
+ if result.get('error'):
360
+ return f"❌ Error: {result['error']}"
361
+
362
+ prediction = result.get('prediction', 'unknown')
363
+ confidence = result.get('confidence', 0)
364
 
365
  if prediction == 'unsubscribe':
366
+ emoji = "βœ…"
367
+ description = "This appears to be an unsubscribe confirmation."
368
  elif prediction == 'important':
369
+ emoji = "⚠️"
370
+ description = "This appears to be an important email."
371
  else:
372
+ emoji = "❓"
373
+ description = "Unable to classify with confidence."
374
 
375
+ output = f"""
376
+ ### Classification Result
 
 
 
 
 
 
 
 
 
 
377
 
378
+ {emoji} **{prediction.upper()}**
379
+
380
+ **Confidence:** {confidence:.2%}
381
+
382
+ {description}
383
+ """
384
+ return output
385
+
386
+ # Connect event handlers
387
+ scan_btn.click(
388
+ fn=scan_emails,
389
+ inputs=[session_state, email_input, ai_enabled, confidence_threshold],
390
+ outputs=[scan_output, results_table, session_state]
391
+ )
392
+
393
+ refresh_stats_btn.click(
394
+ fn=get_statistics,
395
+ inputs=[session_state],
396
+ outputs=[stats_output]
397
+ )
398
+
399
+ test_btn.click(
400
+ fn=test_single_email,
401
+ inputs=[test_subject, test_sender, test_body],
402
+ outputs=[test_output]
403
+ )
404
+
405
+ # Load initial statistics
406
+ demo.load(
407
+ fn=get_statistics,
408
+ inputs=[session_state],
409
+ outputs=[stats_output]
410
+ )
411
 
412
  if __name__ == "__main__":
413
  demo.launch()