SamanthaStorm commited on
Commit
456b288
Β·
verified Β·
1 Parent(s): 9d2e492

Upload app (20).py

Browse files
Files changed (1) hide show
  1. app (20).py +686 -0
app (20).py ADDED
@@ -0,0 +1,686 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ import numpy as np
5
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
6
+ from motif_tagging import detect_motifs
7
+ import re
8
+ import matplotlib.pyplot as plt
9
+ import io
10
+ from PIL import Image
11
+ from datetime import datetime
12
+ from torch.nn.functional import sigmoid
13
+ from collections import Counter
14
+
15
+
16
+ # ─── Abuse Model ─────────────────────────────────────────────────
17
+ model_name = "SamanthaStorm/tether-multilabel-v3"
18
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
20
+
21
+ LABELS = [
22
+ "recovery", "control", "gaslighting", "guilt tripping", "dismissiveness", "blame shifting",
23
+ "nonabusive","projection", "insults", "contradictory statements", "obscure language"
24
+ ]
25
+
26
+ THRESHOLDS = {
27
+ "recovery": 0.4,
28
+ "control": 0.45,
29
+ "gaslighting": 0.25,
30
+ "guilt tripping": 0.20,
31
+ "dismissiveness": 0.25,
32
+ "blame shifting": 0.25,
33
+ "projection": 0.25,
34
+ "insults": 0.05,
35
+ "contradictory statements": 0.25,
36
+ "obscure language": 0.15,
37
+ "nonabusive": 1.0
38
+ }
39
+
40
+ PATTERN_WEIGHTS = {
41
+ "recovery": 0.7,
42
+ "control": 1.4,
43
+ "gaslighting": 1.50,
44
+ "guilt tripping": 1.2,
45
+ "dismissiveness": 0.9,
46
+ "blame shifting": 0.8,
47
+ "projection": 0.5,
48
+ "insults": 1.4,
49
+ "contradictory statements": 1.0,
50
+ "obscure language": 0.9,
51
+ "nonabusive": 0.0
52
+ }
53
+
54
+ ESCALATION_QUESTIONS = [
55
+ ("Partner has access to firearms or weapons", 4),
56
+ ("Partner threatened to kill you", 3),
57
+ ("Partner threatened you with a weapon", 3),
58
+ ("Partner has ever choked you, even if you considered it consensual at the time", 4),
59
+ ("Partner injured or threatened your pet(s)", 3),
60
+ ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
61
+ ("Partner forced or coerced you into unwanted sexual acts", 3),
62
+ ("Partner threatened to take away your children", 2),
63
+ ("Violence has increased in frequency or severity", 3),
64
+ ("Partner monitors your calls/GPS/social media", 2)
65
+ ]
66
+
67
+ # ─── Escalation Risk Mapping ────────────────────────────────────
68
+ ESCALATION_QUESTIONS = [
69
+ ("Partner has access to firearms or weapons", 4),
70
+ ("Partner threatened to kill you", 3),
71
+ ("Partner threatened you with a weapon", 3),
72
+ ("Partner has ever choked you, even if you considered it consensual at the time", 4),
73
+ ("Partner injured or threatened your pet(s)", 3),
74
+ ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
75
+ ("Partner forced or coerced you into unwanted sexual acts", 3),
76
+ ("Partner threatened to take away your children", 2),
77
+ ("Violence has increased in frequency or severity", 3),
78
+ ("Partner monitors your calls/GPS/social media", 2)
79
+ ]
80
+
81
+ # ─── Escalation Risk Mapping ────────────────────────────────────
82
+ ESCALATION_RISKS = {
83
+ "blame shifting": "low",
84
+ "contradictory statements": "moderate",
85
+ "control": "high",
86
+ "dismissiveness": "moderate",
87
+ "gaslighting": "moderate",
88
+ "guilt tripping": "moderate",
89
+ "insults": "moderate",
90
+ "obscure language": "low",
91
+ "projection": "low",
92
+ "recovery phase": "low"
93
+ }
94
+
95
+ # ─── Risk Stage Labels ─────────────────────────────────────────
96
+ # ─── Risk Stage Labels ─────────────────────────────────────────
97
+ RISK_STAGE_LABELS = {
98
+ 1: "πŸŒ€ Risk Stage: Tension-Building\n"
99
+ "This message reflects rising emotional pressure or subtle control attempts.",
100
+ 2: "πŸ”₯ Risk Stage: Escalation\n"
101
+ "This message includes direct or aggressive patterns, suggesting active harm.",
102
+ 3: "🌧️ Risk Stage: Reconciliation\n"
103
+ "This message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
104
+ 4: "🌸 Risk Stage: Calm / Honeymoon\n"
105
+ "This message appears supportive but may follow prior harm, minimizing it."
106
+ }
107
+
108
+ # ─── Immediate Threat Motifs ───────────────────────────────────
109
+ THREAT_MOTIFS = [
110
+ "i'll kill you", "i’m going to hurt you", "you’re dead", "you won't survive this",
111
+ "i’ll break your face", "i'll bash your head in", "i’ll snap your neck",
112
+ "i’ll come over there and make you shut up", "i'll knock your teeth out",
113
+ "you’re going to bleed", "you want me to hit you?", "i won’t hold back next time",
114
+ "i swear to god i’ll beat you", "next time, i won’t miss", "i’ll make you scream",
115
+ "i know where you live", "i'm outside", "i’ll be waiting", "i saw you with him",
116
+ "you can’t hide from me", "i’m coming to get you", "i'll find you", "i know your schedule",
117
+ "i watched you leave", "i followed you home", "you'll regret this", "you’ll be sorry",
118
+ "you’re going to wish you hadn’t", "you brought this on yourself", "don’t push me",
119
+ "you have no idea what i’m capable of", "you better watch yourself",
120
+ "i don’t care what happens to you anymore", "i’ll make you suffer", "you’ll pay for this",
121
+ "i’ll never let you go", "you’re nothing without me", "if you leave me, i’ll kill myself",
122
+ "i'll ruin you", "i'll tell everyone what you did", "i’ll make sure everyone knows",
123
+ "i’m going to destroy your name", "you’ll lose everyone", "i’ll expose you",
124
+ "your friends will hate you", "i’ll post everything", "you’ll be cancelled",
125
+ "you’ll lose everything", "i’ll take the house", "i’ll drain your account",
126
+ "you’ll never see a dime", "you’ll be broke when i’m done", "i’ll make sure you lose your job",
127
+ "i’ll take your kids", "i’ll make sure you have nothing", "you can’t afford to leave me",
128
+ "don't make me do this", "you know what happens when i’m mad", "you’re forcing my hand",
129
+ "if you just behaved, this wouldn’t happen", "this is your fault",
130
+ "you’re making me hurt you", "i warned you", "you should have listened"
131
+ ]
132
+
133
+
134
+ # New Tone & Sentiment Models
135
+ tone_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tone-tag-multilabel-v1", use_fast=False)
136
+ tone_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tone-tag-multilabel-v1")
137
+ TONE_LABELS = [
138
+ "cold invalidation", "coercive warmth", "contradictory gaslight",
139
+ "deflective hostility", "emotional instability", "nonabusive",
140
+ "performative regret", "emotional threat", "forced accountability flip"
141
+ ]
142
+
143
+ sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment", use_fast=False)
144
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
145
+ SENTIMENT_LABELS = ["undermining", "supportive"]
146
+
147
+
148
+ # ─── DARVO Model ──────────────────────────────────────────────────
149
+ darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
150
+ darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
151
+ darvo_model.eval()
152
+
153
+ def predict_darvo_score(text):
154
+ inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
155
+ with torch.no_grad():
156
+ logits = darvo_model(**inputs).logits
157
+ return round(sigmoid(logits).item(), 4)
158
+
159
+ def detect_weapon_language(text):
160
+ weapon_keywords = ["knife","gun","bomb","weapon","kill","stab"]
161
+ t = text.lower()
162
+ return any(w in t for w in weapon_keywords)
163
+
164
+ # ─── Updated Risk Stage Logic ─────────────────────────────────────
165
+ RISK_STAGE_LABELS = {
166
+ 1: "πŸŒ€ Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
167
+ 2: "πŸ”₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
168
+ 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
169
+ 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
170
+ }
171
+
172
+ def get_risk_stage(patterns, sentiment):
173
+ if "insults" in patterns:
174
+ return 2
175
+ elif "recovery" in patterns:
176
+ return 3
177
+ elif "control" in patterns or "guilt tripping" in patterns:
178
+ return 1
179
+ elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
180
+ return 4
181
+ return 1
182
+
183
+
184
+ # ─── Emotion & Tone Removed (unneeded) ───────────────────────────
185
+ # (Emotion model block removed)
186
+
187
+ # ─── Replace get_emotional_tone_tag ──────────────────────────────
188
+ def get_emotional_tone_tag(text, emotions, sentiment, patterns, abuse_score):
189
+ inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
190
+ with torch.no_grad():
191
+ logits = tone_model(**inputs).logits[0]
192
+ probs = torch.sigmoid(logits).cpu().numpy()
193
+ scores = dict(zip(TONE_LABELS, np.round(probs, 3)))
194
+ return max(scores, key=scores.get)
195
+
196
+
197
+ @spaces.GPU
198
+ def compute_abuse_score(matched_scores, sentiment):
199
+ """
200
+ Compute abuse score from matched patterns and sentiment
201
+ """
202
+ if not matched_scores:
203
+ return 0.0
204
+
205
+ # Calculate weighted score
206
+ total_weight = sum(weight for _, _, weight in matched_scores)
207
+ if total_weight == 0:
208
+ return 0.0
209
+
210
+ # Get highest pattern scores
211
+ pattern_scores = [(label, score) for label, score, _ in matched_scores]
212
+ sorted_scores = sorted(pattern_scores, key=lambda x: x[1], reverse=True)
213
+
214
+ # Base score calculation
215
+ weighted_sum = sum(score * weight for _, score, weight in matched_scores)
216
+ base_score = (weighted_sum / total_weight) * 100
217
+
218
+ # Pattern combination multipliers
219
+ if len(matched_scores) >= 3: # Multiple patterns detected
220
+ base_score *= 1.2 # 20% increase for pattern combinations
221
+
222
+ # High severity patterns
223
+ high_severity_patterns = {'gaslighting', 'control', 'blame shifting'}
224
+ if any(label in high_severity_patterns for label, _, _ in matched_scores):
225
+ base_score *= 1.15 # 15% increase for high severity patterns
226
+
227
+ # Pattern strength boosters
228
+ if any(score > 0.6 for _, score, _ in matched_scores): # Any pattern > 60%
229
+ base_score *= 1.1 # 10% increase for strong patterns
230
+
231
+ # Multiple high scores
232
+ high_scores = len([score for _, score, _ in matched_scores if score > 0.5])
233
+ if high_scores >= 2:
234
+ base_score *= 1.15 # 15% increase for multiple high scores
235
+
236
+ # Apply sentiment modifier
237
+ if sentiment == "supportive":
238
+ # Less reduction for supportive sentiment when high severity patterns present
239
+ if any(label in high_severity_patterns for label, _, _ in matched_scores):
240
+ base_score *= 0.9 # Only 10% reduction
241
+ else:
242
+ base_score *= 0.85 # Normal 15% reduction
243
+ elif sentiment == "undermining":
244
+ base_score *= 1.15 # 15% increase for undermining sentiment
245
+
246
+ # Ensure minimum score for strong patterns
247
+ if any(score > 0.6 for _, score, _ in matched_scores):
248
+ base_score = max(base_score, 65.0)
249
+
250
+ # Cap maximum score
251
+ return min(round(base_score, 1), 100.0)
252
+
253
+
254
+ def analyze_single_message(text, thresholds):
255
+ print("\n=== DEBUG START ===")
256
+ print(f"Input text: {text}")
257
+
258
+ if not text.strip():
259
+ print("Empty text, returning zeros")
260
+ return 0.0, [], [], {"label": "none"}, 1, 0.0, None
261
+
262
+ # Check for explicit abuse
263
+ explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
264
+ explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
265
+ print(f"Explicit abuse detected: {explicit_abuse}")
266
+
267
+ # Abuse model inference
268
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
269
+ with torch.no_grad():
270
+ outputs = model(**inputs)
271
+ raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
272
+
273
+ # Print raw model outputs
274
+ print("\nRaw model scores:")
275
+ for label, score in zip(LABELS, raw_scores):
276
+ print(f"{label}: {score:.3f}")
277
+
278
+ # Get predictions and sort them
279
+ predictions = list(zip(LABELS, raw_scores))
280
+ sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
281
+ print("\nTop 3 predictions:")
282
+ for label, score in sorted_predictions[:3]:
283
+ print(f"{label}: {score:.3f}")
284
+
285
+ # Apply thresholds
286
+ threshold_labels = []
287
+ if explicit_abuse:
288
+ threshold_labels.append("insults")
289
+ print("\nForced inclusion of 'insults' due to explicit abuse")
290
+
291
+ for label, score in sorted_predictions:
292
+ base_threshold = thresholds.get(label, 0.25)
293
+ if explicit_abuse:
294
+ base_threshold *= 0.5
295
+ if score > base_threshold:
296
+ if label not in threshold_labels: # Avoid duplicates
297
+ threshold_labels.append(label)
298
+
299
+ print("\nLabels that passed thresholds:", threshold_labels)
300
+
301
+ # Calculate matched scores
302
+ matched_scores = []
303
+ for label in threshold_labels:
304
+ score = raw_scores[LABELS.index(label)]
305
+ weight = PATTERN_WEIGHTS.get(label, 1.0)
306
+ if explicit_abuse and label == "insults":
307
+ weight *= 1.5
308
+ matched_scores.append((label, score, weight))
309
+
310
+ print("\nMatched scores (label, score, weight):", matched_scores)
311
+
312
+ # Calculate abuse score
313
+ if not matched_scores:
314
+ print("No matched scores, returning 0")
315
+ return 0.0, [], [], {"label": "undermining"}, 2 if explicit_abuse else 1, 0.0, None
316
+
317
+ weighted_sum = sum(score * weight for _, score, weight in matched_scores)
318
+ total_weight = sum(weight for _, _, weight in matched_scores)
319
+ abuse_score = (weighted_sum / total_weight) * 100
320
+
321
+ if explicit_abuse:
322
+ abuse_score = max(abuse_score, 70.0)
323
+
324
+ print(f"\nCalculated abuse score: {abuse_score}")
325
+
326
+ # Get sentiment
327
+ sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
328
+ with torch.no_grad():
329
+ sent_logits = sentiment_model(**sent_inputs).logits[0]
330
+ sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
331
+ sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
332
+ print(f"\nDetected sentiment: {sentiment}")
333
+
334
+ # Get tone
335
+ tone_inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
336
+ with torch.no_grad():
337
+ tone_logits = tone_model(**tone_inputs).logits[0]
338
+ tone_probs = torch.sigmoid(tone_logits).cpu().numpy()
339
+ tone_tag = TONE_LABELS[int(np.argmax(tone_probs))]
340
+ print(f"Detected tone: {tone_tag}")
341
+
342
+ # Get DARVO score
343
+ darvo_score = predict_darvo_score(text)
344
+ print(f"DARVO score: {darvo_score}")
345
+
346
+ # Set stage
347
+ stage = 2 if explicit_abuse or abuse_score > 70 else 1
348
+ print(f"Final stage: {stage}")
349
+
350
+ print("=== DEBUG END ===\n")
351
+
352
+ return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
353
+
354
+
355
+ def generate_risk_snippet(abuse_score, top_label, hybrid_score, stage):
356
+ """
357
+ Generate risk assessment snippet based on abuse score and other factors
358
+ """
359
+ risk_level = (
360
+ "Critical" if abuse_score >= 85 or hybrid_score >= 20 else
361
+ "High" if abuse_score >= 70 or hybrid_score >= 15 else
362
+ "Moderate" if abuse_score >= 50 or hybrid_score >= 10 else
363
+ "Low"
364
+ )
365
+
366
+ risk_descriptions = {
367
+ "Critical": (
368
+ "🚨 **Risk Level: Critical**\n"
369
+ "Multiple severe abuse patterns detected. This situation shows signs of "
370
+ "dangerous escalation and immediate intervention may be needed."
371
+ ),
372
+ "High": (
373
+ "⚠️ **Risk Level: High**\n"
374
+ "Strong abuse patterns detected. This situation shows concerning "
375
+ "signs of manipulation and control."
376
+ ),
377
+ "Moderate": (
378
+ "⚑ **Risk Level: Moderate**\n"
379
+ "Concerning patterns detected. While not severe, these behaviors "
380
+ "indicate unhealthy relationship dynamics."
381
+ ),
382
+ "Low": (
383
+ "πŸ“ **Risk Level: Low**\n"
384
+ "Minor concerning patterns detected. While present, the detected "
385
+ "behaviors are subtle or infrequent."
386
+ )
387
+ }
388
+
389
+ # Add stage-specific context
390
+ stage_context = {
391
+ 1: "Current patterns suggest a tension-building phase.",
392
+ 2: "Messages show signs of active escalation.",
393
+ 3: "Patterns indicate attempted reconciliation without real change.",
394
+ 4: "Surface calm may mask underlying issues."
395
+ }
396
+
397
+ snippet = risk_descriptions[risk_level]
398
+ if stage in stage_context:
399
+ snippet += f"\n{stage_context[stage]}"
400
+
401
+ return snippet
402
+ def generate_abuse_score_chart(dates, scores, patterns):
403
+ """
404
+ Generate a timeline chart of abuse scores
405
+ """
406
+ plt.figure(figsize=(10, 6))
407
+ plt.clf()
408
+
409
+ # Create new figure
410
+ fig, ax = plt.subplots(figsize=(10, 6))
411
+
412
+ # Plot points and lines
413
+ x = range(len(scores))
414
+ plt.plot(x, scores, 'bo-', linewidth=2, markersize=8)
415
+
416
+ # Add labels for each point
417
+ for i, (score, pattern) in enumerate(zip(scores, patterns)):
418
+ plt.annotate(
419
+ f'{pattern}\n{score:.0f}%',
420
+ (i, score),
421
+ textcoords="offset points",
422
+ xytext=(0, 10),
423
+ ha='center',
424
+ bbox=dict(
425
+ boxstyle='round,pad=0.5',
426
+ fc='white',
427
+ ec='gray',
428
+ alpha=0.8
429
+ )
430
+ )
431
+
432
+ # Customize the plot
433
+ plt.ylim(-5, 105)
434
+ plt.grid(True, linestyle='--', alpha=0.7)
435
+ plt.title('Abuse Pattern Timeline', pad=20, fontsize=12)
436
+ plt.ylabel('Abuse Score %')
437
+
438
+ # X-axis labels
439
+ plt.xticks(x, dates, rotation=45)
440
+
441
+ # Risk level bands with better colors
442
+ plt.axhspan(0, 50, color='#90EE90', alpha=0.2) # light green
443
+ plt.axhspan(50, 70, color='#FFD700', alpha=0.2) # gold
444
+ plt.axhspan(70, 85, color='#FFA500', alpha=0.2) # orange
445
+ plt.axhspan(85, 100, color='#FF6B6B', alpha=0.2) # light red
446
+
447
+ # Add risk level labels
448
+ plt.text(-0.2, 25, 'Low Risk', rotation=90, va='center')
449
+ plt.text(-0.2, 60, 'Moderate Risk', rotation=90, va='center')
450
+ plt.text(-0.2, 77.5, 'High Risk', rotation=90, va='center')
451
+ plt.text(-0.2, 92.5, 'Critical Risk', rotation=90, va='center')
452
+
453
+ # Adjust layout
454
+ plt.tight_layout()
455
+
456
+ # Convert plot to image
457
+ buf = io.BytesIO()
458
+ plt.savefig(buf, format='png', bbox_inches='tight')
459
+ buf.seek(0)
460
+ plt.close('all') # Close all figures to prevent memory leaks
461
+
462
+ return Image.open(buf)
463
+
464
+
465
+ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
466
+ from collections import Counter
467
+
468
+ none_selected_checked = answers_and_none[-1]
469
+ responses_checked = any(answers_and_none[:-1])
470
+ none_selected = not responses_checked and none_selected_checked
471
+
472
+ if none_selected:
473
+ escalation_score = 0
474
+ escalation_note = "Checklist completed: no danger items reported."
475
+ escalation_completed = True
476
+ elif responses_checked:
477
+ escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
478
+ escalation_note = "Checklist completed."
479
+ escalation_completed = True
480
+ else:
481
+ escalation_score = None
482
+ escalation_note = "Checklist not completed."
483
+ escalation_completed = False
484
+
485
+ messages = [msg1, msg2, msg3]
486
+ active = [(m, f"Message {i+1}") for i, m in enumerate(messages) if m.strip()]
487
+ if not active:
488
+ return "Please enter at least one message.", None
489
+
490
+ # Flag any threat phrases present in the messages
491
+ import re
492
+
493
+ def normalize(text):
494
+ import unicodedata
495
+ text = text.lower().strip()
496
+ text = unicodedata.normalize("NFKD", text) # handles curly quotes
497
+ text = text.replace("’", "'") # smart to straight
498
+ return re.sub(r"[^a-z0-9 ]", "", text)
499
+
500
+ def detect_threat_motifs(message, motif_list):
501
+ norm_msg = normalize(message)
502
+ return [
503
+ motif for motif in motif_list
504
+ if normalize(motif) in norm_msg
505
+ ]
506
+
507
+ # Collect matches per message
508
+ immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
509
+ flat_threats = [t for sublist in immediate_threats for t in sublist]
510
+ threat_risk = "Yes" if flat_threats else "No"
511
+ results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
512
+
513
+ abuse_scores = [r[0][0] for r in results]
514
+ stages = [r[0][4] for r in results]
515
+ darvo_scores = [r[0][5] for r in results]
516
+ tone_tags = [r[0][6] for r in results]
517
+ dates_used = [r[1] for r in results]
518
+
519
+ predicted_labels = [label for r in results for label in r[0][1]] # Use threshold_labels instead
520
+ high = {'control'}
521
+ moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults', 'contradictory statements', 'guilt tripping'}
522
+ low = {'blame shifting', 'projection', 'recovery phase'}
523
+ counts = {'high': 0, 'moderate': 0, 'low': 0}
524
+ for label in predicted_labels:
525
+ if label in high:
526
+ counts['high'] += 1
527
+ elif label in moderate:
528
+ counts['moderate'] += 1
529
+ elif label in low:
530
+ counts['low'] += 1
531
+
532
+ # Pattern escalation logic
533
+ pattern_escalation_risk = "Low"
534
+ if counts['high'] >= 2 and counts['moderate'] >= 2:
535
+ pattern_escalation_risk = "Critical"
536
+ elif (counts['high'] >= 2 and counts['moderate'] >= 1) or (counts['moderate'] >= 3) or (counts['high'] >= 1 and counts['moderate'] >= 2):
537
+ pattern_escalation_risk = "High"
538
+ elif (counts['moderate'] == 2) or (counts['high'] == 1 and counts['moderate'] == 1) or (counts['moderate'] == 1 and counts['low'] >= 2) or (counts['high'] == 1 and sum(counts.values()) == 1):
539
+ pattern_escalation_risk = "Moderate"
540
+
541
+ checklist_escalation_risk = "Unknown" if escalation_score is None else (
542
+ "Critical" if escalation_score >= 20 else
543
+ "Moderate" if escalation_score >= 10 else
544
+ "Low"
545
+ )
546
+
547
+ escalation_bump = 0
548
+ for result, _ in results:
549
+ abuse_score, _, _, sentiment, stage, darvo_score, tone_tag = result
550
+ if darvo_score > 0.65:
551
+ escalation_bump += 3
552
+ if tone_tag in ["forced accountability flip", "emotional threat"]:
553
+ escalation_bump += 2
554
+ if abuse_score > 80:
555
+ escalation_bump += 2
556
+ if stage == 2:
557
+ escalation_bump += 3
558
+
559
+ def rank(label):
560
+ return {"Low": 0, "Moderate": 1, "High": 2, "Critical": 3, "Unknown": 0}.get(label, 0)
561
+
562
+ combined_score = rank(pattern_escalation_risk) + rank(checklist_escalation_risk) + escalation_bump
563
+ escalation_risk = (
564
+ "Critical" if combined_score >= 6 else
565
+ "High" if combined_score >= 4 else
566
+ "Moderate" if combined_score >= 2 else
567
+ "Low"
568
+ )
569
+
570
+ none_selected_checked = answers_and_none[-1]
571
+ responses_checked = any(answers_and_none[:-1])
572
+ none_selected = not responses_checked and none_selected_checked
573
+
574
+ # Determine escalation_score
575
+ if none_selected:
576
+ escalation_score = 0
577
+ escalation_completed = True
578
+ elif responses_checked:
579
+ escalation_score = sum(
580
+ w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a
581
+ )
582
+ escalation_completed = True
583
+ else:
584
+ escalation_score = None
585
+ escalation_completed = False
586
+
587
+ # Build escalation_text and hybrid_score
588
+ if escalation_score is None:
589
+ escalation_text = (
590
+ "🚫 **Escalation Potential: Unknown** (Checklist not completed)\n"
591
+ "⚠️ This section was not completed. Escalation potential is estimated using message data only.\n"
592
+ )
593
+ hybrid_score = 0
594
+ elif escalation_score == 0:
595
+ escalation_text = (
596
+ "βœ… **Escalation Checklist Completed:** No danger items reported.\n"
597
+ "🧭 **Escalation potential estimated from detected message patterns only.**\n"
598
+ f"β€’ Pattern Risk: {pattern_escalation_risk}\n"
599
+ f"β€’ Checklist Risk: None reported\n"
600
+ f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
601
+ )
602
+ hybrid_score = escalation_bump
603
+ else:
604
+ hybrid_score = escalation_score + escalation_bump
605
+ escalation_text = (
606
+ f"πŸ“ˆ **Escalation Potential: {escalation_risk} ({hybrid_score}/29)**\n"
607
+ "πŸ“‹ This score combines your safety checklist answers *and* detected high-risk behavior.\n"
608
+ f"β€’ Pattern Risk: {pattern_escalation_risk}\n"
609
+ f"β€’ Checklist Risk: {checklist_escalation_risk}\n"
610
+ f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
611
+ )
612
+ # Composite Abuse Score
613
+ composite_abuse_scores = []
614
+ for result, _ in results:
615
+ abuse_score, _, matched_scores, sentiment, _, _, _ = result
616
+ composite_abuse_scores.append(abuse_score) # Just use the already calculated abuse score
617
+ composite_abuse = int(round(sum(composite_abuse_scores) / len(composite_abuse_scores)))
618
+
619
+ most_common_stage = max(set(stages), key=stages.count)
620
+ stage_text = RISK_STAGE_LABELS[most_common_stage]
621
+
622
+ # Derive top label list for each message
623
+ top_labels = []
624
+ for result, _ in results:
625
+ threshold_labels = result[1] # Get threshold_labels from result
626
+ if threshold_labels: # If we have threshold labels
627
+ top_labels.append(threshold_labels[0]) # Add the first one
628
+ else:
629
+ top_labels.append("none") # Default if no labels
630
+
631
+ avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
632
+ darvo_blurb = ""
633
+ if avg_darvo > 0.25:
634
+ level = "moderate" if avg_darvo < 0.65 else "high"
635
+ darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
636
+
637
+ out = f"Abuse Intensity: {composite_abuse}%\n"
638
+ out += "πŸ“Š This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
639
+ out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
640
+ out += f"\n\n{stage_text}"
641
+ out += darvo_blurb
642
+ out += "\n\n🎭 **Emotional Tones Detected:**\n"
643
+ for i, tone in enumerate(tone_tags):
644
+ out += f"β€’ Message {i+1}: *{tone or 'none'}*\n"
645
+ # --- Add Immediate Danger Threats section
646
+ if flat_threats:
647
+ out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
648
+ for t in set(flat_threats):
649
+ out += f"β€’ \"{t}\"\n"
650
+ out += "\n⚠️ These phrases may indicate an imminent risk to physical safety."
651
+ else:
652
+ out += "\n\n🧩 **Immediate Danger Threats:** None explicitly detected.\n"
653
+ out += "This does *not* rule out risk, but no direct threat phrases were matched."
654
+ pattern_labels = [
655
+ pats[0][0] if (pats := r[0][2]) else "none"
656
+ for r in results
657
+ ]
658
+ timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
659
+ out += "\n\n" + escalation_text
660
+ return out, timeline_image
661
+
662
+ textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
663
+ quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
664
+ none_box = gr.Checkbox(label="None of the above")
665
+
666
+
667
+ # ─── FINAL β€œFORCE LAUNCH” (no guards) ────────────────────────
668
+
669
+ demo = gr.Interface(
670
+ fn=analyze_composite,
671
+ inputs=textbox_inputs + quiz_boxes + [none_box],
672
+ outputs=[
673
+ gr.Textbox(label="Results"),
674
+ gr.Image(label="Abuse Score Timeline", type="pil")
675
+ ],
676
+ title="Abuse Pattern Detector + Escalation Quiz",
677
+ description=(
678
+ "Enter up to three messages that concern you. "
679
+ "For the most accurate results, include messages from a recent emotionally intense period."
680
+ ),
681
+ flagging_mode="manual"
682
+ )
683
+
684
+ # This single call will start the server and block,
685
+ # keeping the container alive on Spaces.
686
+ demo.launch()