SamanthaStorm commited on
Commit
68ee468
Β·
verified Β·
1 Parent(s): e345a71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +561 -734
app.py CHANGED
@@ -1,9 +1,11 @@
 
 
 
 
1
  import gradio as gr
2
- import spaces
3
  import torch
4
  import numpy as np
5
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
6
- from motif_tagging import detect_motifs
7
  import re
8
  import matplotlib.pyplot as plt
9
  import io
@@ -11,18 +13,71 @@ from PIL import Image
11
  from datetime import datetime
12
  from torch.nn.functional import sigmoid
13
  from collections import Counter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
15
 
16
- # ─── Abuse Model ─────────────────────────────────────────────────
17
- model_name = "SamanthaStorm/tether-multilabel-v4"
18
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
19
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
20
-
21
  LABELS = [
22
- "recovery", "control", "gaslighting", "guilt tripping", "dismissiveness", "blame shifting",
23
- "nonabusive","projection", "insults", "contradictory statements", "obscure language"
 
 
 
 
 
 
 
24
  ]
25
 
 
 
26
  THRESHOLDS = {
27
  "recovery": 0.4,
28
  "control": 0.45,
@@ -64,767 +119,539 @@ ESCALATION_QUESTIONS = [
64
  ("Partner monitors your calls/GPS/social media", 2)
65
  ]
66
 
67
- # ─── Escalation Risk Mapping ────────────────────────────────────
68
- ESCALATION_QUESTIONS = [
69
- ("Partner has access to firearms or weapons", 4),
70
- ("Partner threatened to kill you", 3),
71
- ("Partner threatened you with a weapon", 3),
72
- ("Partner has ever choked you, even if you considered it consensual at the time", 4),
73
- ("Partner injured or threatened your pet(s)", 3),
74
- ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
75
- ("Partner forced or coerced you into unwanted sexual acts", 3),
76
- ("Partner threatened to take away your children", 2),
77
- ("Violence has increased in frequency or severity", 3),
78
- ("Partner monitors your calls/GPS/social media", 2)
79
- ]
80
-
81
- # ─── Escalation Risk Mapping ────────────────────────────────────
82
- ESCALATION_RISKS = {
83
- "blame shifting": "low",
84
- "contradictory statements": "moderate",
85
- "control": "high",
86
- "dismissiveness": "moderate",
87
- "gaslighting": "moderate",
88
- "guilt tripping": "moderate",
89
- "insults": "moderate",
90
- "obscure language": "low",
91
- "projection": "low",
92
- "recovery phase": "low"
93
- }
94
-
95
- # ─── Risk Stage Labels ─────────────────────────────────────────
96
- # ─── Risk Stage Labels ─────────────────────────────────────────
97
  RISK_STAGE_LABELS = {
98
- 1: "πŸŒ€ Risk Stage: Tension-Building\n"
99
- "This message reflects rising emotional pressure or subtle control attempts.",
100
- 2: "πŸ”₯ Risk Stage: Escalation\n"
101
- "This message includes direct or aggressive patterns, suggesting active harm.",
102
- 3: "🌧️ Risk Stage: Reconciliation\n"
103
- "This message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
104
- 4: "🌸 Risk Stage: Calm / Honeymoon\n"
105
- "This message appears supportive but may follow prior harm, minimizing it."
106
  }
107
 
108
- # ─── Immediate Threat Motifs ───────────────────────────────────
109
  THREAT_MOTIFS = [
110
- "i'll kill you", "i’m going to hurt you", "you’re dead", "you won't survive this",
111
- "i’ll break your face", "i'll bash your head in", "i’ll snap your neck",
112
- "i’ll come over there and make you shut up", "i'll knock your teeth out",
113
- "you’re going to bleed", "you want me to hit you?", "i won’t hold back next time",
114
- "i swear to god i’ll beat you", "next time, i won’t miss", "i’ll make you scream",
115
- "i know where you live", "i'm outside", "i’ll be waiting", "i saw you with him",
116
- "you can’t hide from me", "i’m coming to get you", "i'll find you", "i know your schedule",
117
- "i watched you leave", "i followed you home", "you'll regret this", "you’ll be sorry",
118
- "you’re going to wish you hadn’t", "you brought this on yourself", "don’t push me",
119
- "you have no idea what i’m capable of", "you better watch yourself",
120
- "i don’t care what happens to you anymore", "i’ll make you suffer", "you’ll pay for this",
121
- "i’ll never let you go", "you’re nothing without me", "if you leave me, i’ll kill myself",
122
- "i'll ruin you", "i'll tell everyone what you did", "i’ll make sure everyone knows",
123
- "i’m going to destroy your name", "you’ll lose everyone", "i’ll expose you",
124
- "your friends will hate you", "i’ll post everything", "you’ll be cancelled",
125
- "you’ll lose everything", "i’ll take the house", "i’ll drain your account",
126
- "you’ll never see a dime", "you’ll be broke when i’m done", "i’ll make sure you lose your job",
127
- "i’ll take your kids", "i’ll make sure you have nothing", "you can’t afford to leave me",
128
- "don't make me do this", "you know what happens when i’m mad", "you’re forcing my hand",
129
- "if you just behaved, this wouldn’t happen", "this is your fault",
130
- "you’re making me hurt you", "i warned you", "you should have listened"
131
  ]
132
 
133
-
134
- # New Tone & Sentiment Models
135
- tone_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tone-tag-multilabel-v1", use_fast=False)
136
- tone_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tone-tag-multilabel-v1")
137
- TONE_LABELS = [
138
- "cold invalidation", "coercive warmth", "contradictory gaslight",
139
- "deflective hostility", "emotional instability", "nonabusive",
140
- "performative regret", "emotional threat", "forced accountability flip"
141
- ]
142
-
143
- sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment", use_fast=False)
144
- sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
145
- SENTIMENT_LABELS = ["undermining", "supportive"]
146
-
147
-
148
- # ─── DARVO Model ──────────────────────────────────────────────────
149
- darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
150
- darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
151
- darvo_model.eval()
152
-
153
  def predict_darvo_score(text):
154
- inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
155
- with torch.no_grad():
156
- logits = darvo_model(**inputs).logits
157
- return round(sigmoid(logits).item(), 4)
 
 
 
 
 
 
158
 
159
  def detect_weapon_language(text):
160
- weapon_keywords = ["knife","gun","bomb","weapon","kill","stab"]
 
161
  t = text.lower()
162
  return any(w in t for w in weapon_keywords)
163
 
164
- # ─── Updated Risk Stage Logic ─────────────────────────────────────
165
- RISK_STAGE_LABELS = {
166
- 1: "πŸŒ€ Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
167
- 2: "πŸ”₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
168
- 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
169
- 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
170
- }
171
-
172
  def get_risk_stage(patterns, sentiment):
173
- if "insults" in patterns:
174
- return 2
175
- elif "recovery" in patterns:
176
- return 3
177
- elif "control" in patterns or "guilt tripping" in patterns:
 
 
 
 
 
 
 
 
178
  return 1
179
- elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
180
- return 4
181
- return 1
182
-
183
-
184
- # ─── Emotion & Tone Removed (unneeded) ───────────────────────────
185
- # (Emotion model block removed)
186
 
187
- # ─── Replace get_emotional_tone_tag ──────────────────────────────
188
  def get_emotional_tone_tag(text, emotions, sentiment, patterns, abuse_score):
189
- inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
190
- with torch.no_grad():
191
- logits = tone_model(**inputs).logits[0]
192
- probs = torch.sigmoid(logits).cpu().numpy()
193
- scores = dict(zip(TONE_LABELS, np.round(probs, 3)))
194
- return max(scores, key=scores.get)
195
-
 
 
 
 
 
196
 
197
- @spaces.GPU
198
  def compute_abuse_score(matched_scores, sentiment):
199
- """
200
- Compute abuse score from matched patterns and sentiment
201
- """
202
- if not matched_scores:
203
- return 0.0
 
 
 
204
 
205
- # Calculate weighted score
206
- total_weight = sum(weight for _, _, weight in matched_scores)
207
- if total_weight == 0:
208
- return 0.0
209
-
210
- # Get highest pattern scores
211
- pattern_scores = [(label, score) for label, score, _ in matched_scores]
212
- sorted_scores = sorted(pattern_scores, key=lambda x: x[1], reverse=True)
213
-
214
- # Base score calculation
215
- weighted_sum = sum(score * weight for _, score, weight in matched_scores)
216
- base_score = (weighted_sum / total_weight) * 100
217
-
218
- # Pattern combination multipliers
219
- if len(matched_scores) >= 3: # Multiple patterns detected
220
- base_score *= 1.2 # 20% increase for pattern combinations
221
-
222
- # High severity patterns
223
- high_severity_patterns = {'gaslighting', 'control', 'blame shifting'}
224
- if any(label in high_severity_patterns for label, _, _ in matched_scores):
225
- base_score *= 1.15 # 15% increase for high severity patterns
226
-
227
- # Pattern strength boosters
228
- if any(score > 0.6 for _, score, _ in matched_scores): # Any pattern > 60%
229
- base_score *= 1.1 # 10% increase for strong patterns
230
 
231
- # Multiple high scores
232
- high_scores = len([score for _, score, _ in matched_scores if score > 0.5])
233
- if high_scores >= 2:
234
- base_score *= 1.15 # 15% increase for multiple high scores
235
-
236
- # Apply sentiment modifier
237
- if sentiment == "supportive":
238
- # Less reduction for supportive sentiment when high severity patterns present
239
  if any(label in high_severity_patterns for label, _, _ in matched_scores):
240
- base_score *= 0.9 # Only 10% reduction
241
- else:
242
- base_score *= 0.85 # Normal 15% reduction
243
- elif sentiment == "undermining":
244
- base_score *= 1.15 # 15% increase for undermining sentiment
245
-
246
- # Ensure minimum score for strong patterns
247
- if any(score > 0.6 for _, score, _ in matched_scores):
248
- base_score = max(base_score, 65.0)
249
-
250
- # Cap maximum score
251
- return min(round(base_score, 1), 100.0)
252
-
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
  def analyze_single_message(text, thresholds):
255
- print("\n=== DEBUG START ===")
256
- print(f"Input text: {text}")
257
-
258
- if not text.strip():
259
- print("Empty text, returning zeros")
260
- return 0.0, [], [], {"label": "none"}, 1, 0.0, None
261
-
262
- # Check for explicit abuse
263
- explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
264
- explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
265
- print(f"Explicit abuse detected: {explicit_abuse}")
266
-
267
- # Abuse model inference
268
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
269
- with torch.no_grad():
270
- outputs = model(**inputs)
271
- raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
272
-
273
- # Print raw model outputs
274
- print("\nRaw model scores:")
275
- for label, score in zip(LABELS, raw_scores):
276
- print(f"{label}: {score:.3f}")
277
-
278
- # Get predictions and sort them
279
- predictions = list(zip(LABELS, raw_scores))
280
- sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
281
- print("\nTop 3 predictions:")
282
- for label, score in sorted_predictions[:3]:
283
- print(f"{label}: {score:.3f}")
284
-
285
- # Apply thresholds
286
- threshold_labels = []
287
- if explicit_abuse:
288
- threshold_labels.append("insults")
289
- print("\nForced inclusion of 'insults' due to explicit abuse")
290
 
291
- for label, score in sorted_predictions:
292
- base_threshold = thresholds.get(label, 0.25)
293
  if explicit_abuse:
294
- base_threshold *= 0.5
295
- if score > base_threshold:
296
- if label not in threshold_labels: # Avoid duplicates
297
- threshold_labels.append(label)
298
-
299
- print("\nLabels that passed thresholds:", threshold_labels)
300
-
301
- # Calculate matched scores
302
- matched_scores = []
303
- for label in threshold_labels:
304
- score = raw_scores[LABELS.index(label)]
305
- weight = PATTERN_WEIGHTS.get(label, 1.0)
306
- if explicit_abuse and label == "insults":
307
- weight *= 1.5
308
- matched_scores.append((label, score, weight))
309
-
310
- print("\nMatched scores (label, score, weight):", matched_scores)
311
-
312
- # Calculate abuse score
313
- if not matched_scores:
314
- print("No matched scores, returning 0")
315
- return 0.0, [], [], {"label": "undermining"}, 2 if explicit_abuse else 1, 0.0, None
316
-
317
- weighted_sum = sum(score * weight for _, score, weight in matched_scores)
318
- total_weight = sum(weight for _, _, weight in matched_scores)
319
- abuse_score = (weighted_sum / total_weight) * 100
320
-
321
- if explicit_abuse:
322
- abuse_score = max(abuse_score, 70.0)
323
-
324
- print(f"\nCalculated abuse score: {abuse_score}")
325
-
326
- # Get sentiment
327
- sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
328
- with torch.no_grad():
329
- sent_logits = sentiment_model(**sent_inputs).logits[0]
330
- sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
331
- sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
332
- print(f"\nDetected sentiment: {sentiment}")
333
-
334
- # Get tone
335
- tone_inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
336
- with torch.no_grad():
337
- tone_logits = tone_model(**tone_inputs).logits[0]
338
- tone_probs = torch.sigmoid(tone_logits).cpu().numpy()
339
- tone_tag = TONE_LABELS[int(np.argmax(tone_probs))]
340
- print(f"Detected tone: {tone_tag}")
341
-
342
- # Get DARVO score
343
- darvo_score = predict_darvo_score(text)
344
- print(f"DARVO score: {darvo_score}")
345
-
346
- # Set stage
347
- stage = 2 if explicit_abuse or abuse_score > 70 else 1
348
- print(f"Final stage: {stage}")
349
-
350
- print("=== DEBUG END ===\n")
351
-
352
- return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
353
 
 
 
 
 
 
354
 
355
- def generate_risk_snippet(abuse_score, patterns, hybrid_score, stage):
356
- """
357
- Enhanced risk assessment generator with more nuanced scoring and pattern analysis
358
-
359
- Parameters:
360
- - abuse_score: float (0-100)
361
- - patterns: list of detected abuse patterns
362
- - hybrid_score: float (combined escalation/risk score)
363
- - stage: int (1-4 representing relationship stages)
364
- """
365
-
366
- # Define risk thresholds with more granular levels
367
- def get_risk_level(abuse_score, hybrid_score, patterns):
368
- if abuse_score >= 85 or hybrid_score >= 20:
369
- return "Critical"
370
- elif abuse_score >= 70 or hybrid_score >= 15:
371
- return "High"
372
- elif abuse_score >= 50 or hybrid_score >= 10:
373
- return "Moderate"
374
- return "Low"
375
-
376
- # Pattern severity weights
377
- PATTERN_SEVERITY = {
378
- "control": 3,
379
- "gaslighting": 3,
380
- "insults": 2,
381
- "blame shifting": 2,
382
- "guilt tripping": 2,
383
- "dismissiveness": 1,
384
- "projection": 1,
385
- "contradictory statements": 1,
386
- "obscure language": 1,
387
- "recovery": 1
388
- }
389
-
390
- # Calculate weighted pattern severity
391
- pattern_severity = sum(PATTERN_SEVERITY.get(p, 0) for p in patterns)
392
-
393
- # Get base risk level
394
- risk_level = get_risk_level(abuse_score, hybrid_score, patterns)
395
-
396
- # Generate risk descriptions with more detailed context
397
- risk_descriptions = {
398
- "Critical": (
399
- "🚨 **Risk Level: Critical**\n"
400
- f"Multiple severe abuse patterns detected (Score: {abuse_score:.1f}%). "
401
- "This situation shows signs of dangerous escalation and immediate intervention may be needed."
402
- ),
403
- "High": (
404
- "⚠️ **Risk Level: High**\n"
405
- f"Strong abuse patterns detected (Score: {abuse_score:.1f}%). "
406
- "This situation shows concerning signs of manipulation and control."
407
- ),
408
- "Moderate": (
409
- "⚑ **Risk Level: Moderate**\n"
410
- f"Concerning patterns detected (Score: {abuse_score:.1f}%). "
411
- "While not severe, these behaviors indicate unhealthy relationship dynamics."
412
- ),
413
- "Low": (
414
- "πŸ“ **Risk Level: Low**\n"
415
- f"Minor concerning patterns detected (Score: {abuse_score:.1f}%). "
416
- "While present, the detected behaviors are subtle or infrequent."
417
- )
418
- }
419
-
420
- # Add stage-specific context
421
- stage_context = {
422
- 1: "Current patterns suggest a tension-building phase.",
423
- 2: "Messages show signs of active escalation.",
424
- 3: "Patterns indicate attempted reconciliation without real change.",
425
- 4: "Surface calm may mask underlying issues."
426
- }
427
-
428
- # Build output
429
- output = risk_descriptions[risk_level]
430
- if stage in stage_context:
431
- output += f"\n{stage_context[stage]}"
432
-
433
- # Add pattern analysis if patterns detected
434
- if patterns:
435
- output += "\n\nπŸ” **Detected Patterns:**"
436
- for pattern in patterns:
437
- severity = PATTERN_SEVERITY.get(pattern, 0)
438
- output += f"\nβ€’ {pattern.title()} (Severity: {'❗' * severity})"
439
-
440
- # Add safety recommendations based on risk level
441
- if risk_level in ["Critical", "High"]:
442
- output += "\n\n⚠️ **Safety Recommendations:**"
443
- output += "\nβ€’ Consider reaching out to a domestic violence hotline"
444
- output += "\nβ€’ Document all concerning interactions"
445
- output += "\nβ€’ Have a safety plan in place"
446
-
447
- return output
448
 
 
 
 
449
 
450
  def generate_abuse_score_chart(dates, scores, patterns):
451
- """
452
- Generate a timeline chart of abuse scores
453
- """
454
- plt.figure(figsize=(10, 6))
455
- plt.clf()
456
-
457
- # Create new figure
458
- fig, ax = plt.subplots(figsize=(10, 6))
459
-
460
- # Plot points and lines
461
- x = range(len(scores))
462
- plt.plot(x, scores, 'bo-', linewidth=2, markersize=8)
463
-
464
- # Add labels for each point
465
- for i, (score, pattern) in enumerate(zip(scores, patterns)):
466
- plt.annotate(
467
- f'{pattern}\n{score:.0f}%',
468
- (i, score),
469
- textcoords="offset points",
470
- xytext=(0, 10),
471
- ha='center',
472
- bbox=dict(
473
- boxstyle='round,pad=0.5',
474
- fc='white',
475
- ec='gray',
476
- alpha=0.8
477
  )
478
- )
479
-
480
- # Customize the plot
481
- plt.ylim(-5, 105)
482
- plt.grid(True, linestyle='--', alpha=0.7)
483
- plt.title('Abuse Pattern Timeline', pad=20, fontsize=12)
484
- plt.ylabel('Abuse Score %')
485
-
486
- # X-axis labels
487
- plt.xticks(x, dates, rotation=45)
488
-
489
- # Risk level bands with better colors
490
- plt.axhspan(0, 50, color='#90EE90', alpha=0.2) # light green
491
- plt.axhspan(50, 70, color='#FFD700', alpha=0.2) # gold
492
- plt.axhspan(70, 85, color='#FFA500', alpha=0.2) # orange
493
- plt.axhspan(85, 100, color='#FF6B6B', alpha=0.2) # light red
494
-
495
- # Add risk level labels
496
- plt.text(-0.2, 25, 'Low Risk', rotation=90, va='center')
497
- plt.text(-0.2, 60, 'Moderate Risk', rotation=90, va='center')
498
- plt.text(-0.2, 77.5, 'High Risk', rotation=90, va='center')
499
- plt.text(-0.2, 92.5, 'Critical Risk', rotation=90, va='center')
500
-
501
- # Adjust layout
502
- plt.tight_layout()
503
-
504
- # Convert plot to image
505
- buf = io.BytesIO()
506
- plt.savefig(buf, format='png', bbox_inches='tight')
507
- buf.seek(0)
508
- plt.close('all') # Close all figures to prevent memory leaks
509
-
510
- return Image.open(buf)
511
-
 
512
 
513
  def analyze_composite(msg1, msg2, msg3, *answers_and_none):
514
- from collections import Counter
515
-
516
- none_selected_checked = answers_and_none[-1]
517
- responses_checked = any(answers_and_none[:-1])
518
- none_selected = not responses_checked and none_selected_checked
519
-
520
- if none_selected:
521
- escalation_score = 0
522
- escalation_note = "Checklist completed: no danger items reported."
523
- escalation_completed = True
524
- elif responses_checked:
525
- escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
526
- escalation_note = "Checklist completed."
527
- escalation_completed = True
528
- else:
529
- escalation_score = None
530
- escalation_note = "Checklist not completed."
531
- escalation_completed = False
532
-
533
- messages = [msg1, msg2, msg3]
534
- active = [(m, f"Message {i+1}") for i, m in enumerate(messages) if m.strip()]
535
- if not active:
536
- return "Please enter at least one message.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
 
538
- # Flag any threat phrases present in the messages
539
- import re
540
-
541
- def normalize(text):
542
- import unicodedata
543
- text = text.lower().strip()
544
- text = unicodedata.normalize("NFKD", text) # handles curly quotes
545
- text = text.replace("’", "'") # smart to straight
546
- return re.sub(r"[^a-z0-9 ]", "", text)
547
-
548
- def detect_threat_motifs(message, motif_list):
549
- norm_msg = normalize(message)
550
- return [
551
- motif for motif in motif_list
552
- if normalize(motif) in norm_msg
553
- ]
554
-
555
- # Collect matches per message
556
- immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
557
- flat_threats = [t for sublist in immediate_threats for t in sublist]
558
- threat_risk = "Yes" if flat_threats else "No"
559
- results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
560
-
561
- abuse_scores = [r[0][0] for r in results]
562
- stages = [r[0][4] for r in results]
563
- darvo_scores = [r[0][5] for r in results]
564
- tone_tags = [r[0][6] for r in results]
565
- dates_used = [r[1] for r in results]
566
-
567
- predicted_labels = [label for r in results for label in r[0][1]] # Use threshold_labels instead
568
- high = {'control'}
569
- moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults', 'contradictory statements', 'guilt tripping'}
570
- low = {'blame shifting', 'projection', 'recovery phase'}
571
- counts = {'high': 0, 'moderate': 0, 'low': 0}
572
- for label in predicted_labels:
573
- if label in high:
574
- counts['high'] += 1
575
- elif label in moderate:
576
- counts['moderate'] += 1
577
- elif label in low:
578
- counts['low'] += 1
579
-
580
- # Pattern escalation logic
581
- pattern_escalation_risk = "Low"
582
- if counts['high'] >= 2 and counts['moderate'] >= 2:
583
- pattern_escalation_risk = "Critical"
584
- elif (counts['high'] >= 2 and counts['moderate'] >= 1) or (counts['moderate'] >= 3) or (counts['high'] >= 1 and counts['moderate'] >= 2):
585
- pattern_escalation_risk = "High"
586
- elif (counts['moderate'] == 2) or (counts['high'] == 1 and counts['moderate'] == 1) or (counts['moderate'] == 1 and counts['low'] >= 2) or (counts['high'] == 1 and sum(counts.values()) == 1):
587
- pattern_escalation_risk = "Moderate"
588
-
589
- checklist_escalation_risk = "Unknown" if escalation_score is None else (
590
- "Critical" if escalation_score >= 20 else
591
- "Moderate" if escalation_score >= 10 else
592
- "Low"
593
- )
594
-
595
- escalation_bump = 0
596
- for result, _ in results:
597
- abuse_score, _, _, sentiment, stage, darvo_score, tone_tag = result
598
- if darvo_score > 0.65:
599
- escalation_bump += 3
600
- if tone_tag in ["forced accountability flip", "emotional threat"]:
601
- escalation_bump += 2
602
- if abuse_score > 80:
603
- escalation_bump += 2
604
- if stage == 2:
605
- escalation_bump += 3
606
-
607
- def rank(label):
608
- return {"Low": 0, "Moderate": 1, "High": 2, "Critical": 3, "Unknown": 0}.get(label, 0)
609
-
610
- combined_score = rank(pattern_escalation_risk) + rank(checklist_escalation_risk) + escalation_bump
611
- escalation_risk = (
612
- "Critical" if combined_score >= 6 else
613
- "High" if combined_score >= 4 else
614
- "Moderate" if combined_score >= 2 else
615
- "Low"
616
- )
617
 
618
- none_selected_checked = answers_and_none[-1]
619
- responses_checked = any(answers_and_none[:-1])
620
- none_selected = not responses_checked and none_selected_checked
621
-
622
- # Determine escalation_score
623
- if none_selected:
624
- escalation_score = 0
625
- escalation_completed = True
626
- elif responses_checked:
627
- escalation_score = sum(
628
- w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a
629
- )
630
- escalation_completed = True
631
- else:
632
- escalation_score = None
633
- escalation_completed = False
634
-
635
- # Build escalation_text and hybrid_score
636
- if escalation_score is None:
637
- escalation_text = (
638
- "🚫 **Escalation Potential: Unknown** (Checklist not completed)\n"
639
- "⚠️ This section was not completed. Escalation potential is estimated using message data only.\n"
640
- )
641
- hybrid_score = 0
642
- elif escalation_score == 0:
643
- escalation_text = (
644
- "βœ… **Escalation Checklist Completed:** No danger items reported.\n"
645
- "🧭 **Escalation potential estimated from detected message patterns only.**\n"
646
- f"β€’ Pattern Risk: {pattern_escalation_risk}\n"
647
- f"β€’ Checklist Risk: None reported\n"
648
- f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
649
- )
650
- hybrid_score = escalation_bump
651
- else:
652
- hybrid_score = escalation_score + escalation_bump
653
- escalation_text = (
654
- f"πŸ“ˆ **Escalation Potential: {escalation_risk} ({hybrid_score}/29)**\n"
655
- "πŸ“‹ This score combines your safety checklist answers *and* detected high-risk behavior.\n"
656
- f"β€’ Pattern Risk: {pattern_escalation_risk}\n"
657
- f"β€’ Checklist Risk: {checklist_escalation_risk}\n"
658
- f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
659
  )
660
- # Composite Abuse Score
661
- composite_abuse_scores = []
662
- for result, _ in results:
663
- abuse_score, _, matched_scores, sentiment, _, _, _ = result
664
- composite_abuse_scores.append(abuse_score) # Just use the already calculated abuse score
665
- composite_abuse = int(round(sum(composite_abuse_scores) / len(composite_abuse_scores)))
666
-
667
- most_common_stage = max(set(stages), key=stages.count)
668
- stage_text = RISK_STAGE_LABELS[most_common_stage]
669
-
670
- # Derive top label list for each message
671
- top_labels = []
672
- for result, _ in results:
673
- threshold_labels = result[1] # Get threshold_labels from result
674
- if threshold_labels: # If we have threshold labels
675
- top_labels.append(threshold_labels[0]) # Add the first one
676
- else:
677
- top_labels.append("none") # Default if no labels
678
-
679
- avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
680
- darvo_blurb = ""
681
- if avg_darvo > 0.25:
682
- level = "moderate" if avg_darvo < 0.65 else "high"
683
- darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
684
-
685
- out = f"Abuse Intensity: {composite_abuse}%\n"
686
- out += "πŸ“Š This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
687
- out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
688
- out += f"\n\n{stage_text}"
689
- out += darvo_blurb
690
- out += "\n\n🎭 **Emotional Tones Detected:**\n"
691
- for i, tone in enumerate(tone_tags):
692
- out += f"β€’ Message {i+1}: *{tone or 'none'}*\n"
693
- # --- Add Immediate Danger Threats section
694
- if flat_threats:
695
- out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
696
- for t in set(flat_threats):
697
- out += f"β€’ \"{t}\"\n"
698
- out += "\n⚠️ These phrases may indicate an imminent risk to physical safety."
699
- else:
700
- out += "\n\n🧩 **Immediate Danger Threats:** None explicitly detected.\n"
701
- out += "This does *not* rule out risk, but no direct threat phrases were matched."
702
- pattern_labels = [
703
- pats[0][0] if (pats := r[0][2]) else "none"
704
- for r in results
705
- ]
706
- timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
707
- out += "\n\n" + escalation_text
708
- return out, timeline_image
709
-
710
- textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
711
- quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
712
- none_box = gr.Checkbox(label="None of the above")
713
-
714
-
715
- # ─── FINAL β€œFORCE LAUNCH” (no guards) ────────────────────────
716
-
717
- demo = gr.Interface(
718
- fn=analyze_composite,
719
- inputs=textbox_inputs + quiz_boxes + [none_box],
720
- outputs=[
721
- gr.Textbox(label="Results"),
722
- gr.Image(label="Abuse Score Timeline", type="pil")
723
- ],
724
- title="Abuse Pattern Detector + Escalation Quiz",
725
- description=(
726
- "Enter up to three messages that concern you. "
727
- "For the most accurate results, include messages from a recent emotionally intense period."
728
- ),
729
- flagging_mode="manual"
730
- )
731
-
732
- # This single call will start the server and block,
733
- # keeping the container alive on Spaces.
734
- demo.launch()
735
-
736
- def generate_risk_snippet(abuse_score, patterns, hybrid_score, stage):
737
- """
738
- Enhanced risk assessment generator with more nuanced scoring and pattern analysis
739
-
740
- Parameters:
741
- - abuse_score: float (0-100)
742
- - patterns: list of detected abuse patterns
743
- - hybrid_score: float (combined escalation/risk score)
744
- - stage: int (1-4 representing relationship stages)
745
- """
746
-
747
- # Define risk thresholds with more granular levels
748
- def get_risk_level(abuse_score, hybrid_score, patterns):
749
- if abuse_score >= 85 or hybrid_score >= 20:
750
- return "Critical"
751
- elif abuse_score >= 70 or hybrid_score >= 15:
752
- return "High"
753
- elif abuse_score >= 50 or hybrid_score >= 10:
754
- return "Moderate"
755
- return "Low"
756
-
757
- # Pattern severity weights
758
- PATTERN_SEVERITY = {
759
- "control": 3,
760
- "gaslighting": 3,
761
- "insults": 2,
762
- "blame shifting": 2,
763
- "guilt tripping": 2,
764
- "dismissiveness": 1,
765
- "projection": 1,
766
- "contradictory statements": 1,
767
- "obscure language": 1,
768
- "recovery": 1
769
- }
770
-
771
- # Calculate weighted pattern severity
772
- pattern_severity = sum(PATTERN_SEVERITY.get(p, 0) for p in patterns)
773
-
774
- # Get base risk level
775
- risk_level = get_risk_level(abuse_score, hybrid_score, patterns)
776
-
777
- # Generate risk descriptions with more detailed context
778
- risk_descriptions = {
779
- "Critical": (
780
- "🚨 **Risk Level: Critical**\n"
781
- f"Multiple severe abuse patterns detected (Score: {abuse_score:.1f}%). "
782
- "This situation shows signs of dangerous escalation and immediate intervention may be needed."
783
- ),
784
- "High": (
785
- "⚠️ **Risk Level: High**\n"
786
- f"Strong abuse patterns detected (Score: {abuse_score:.1f}%). "
787
- "This situation shows concerning signs of manipulation and control."
788
- ),
789
- "Moderate": (
790
- "⚑ **Risk Level: Moderate**\n"
791
- f"Concerning patterns detected (Score: {abuse_score:.1f}%). "
792
- "While not severe, these behaviors indicate unhealthy relationship dynamics."
793
- ),
794
- "Low": (
795
- "πŸ“ **Risk Level: Low**\n"
796
- f"Minor concerning patterns detected (Score: {abuse_score:.1f}%). "
797
- "While present, the detected behaviors are subtle or infrequent."
798
  )
799
- }
800
-
801
- # Add stage-specific context
802
- stage_context = {
803
- 1: "Current patterns suggest a tension-building phase.",
804
- 2: "Messages show signs of active escalation.",
805
- 3: "Patterns indicate attempted reconciliation without real change.",
806
- 4: "Surface calm may mask underlying issues."
807
- }
808
-
809
- # Build output
810
- output = risk_descriptions[risk_level]
811
- if stage in stage_context:
812
- output += f"\n{stage_context[stage]}"
813
-
814
- # Add pattern analysis if patterns detected
815
- if patterns:
816
- output += "\n\nπŸ” **Detected Patterns:**"
817
- for pattern in patterns:
818
- severity = PATTERN_SEVERITY.get(pattern, 0)
819
- output += f"\nβ€’ {pattern.title()} (Severity: {'❗' * severity})"
820
-
821
- # Add safety recommendations based on risk level
822
- if risk_level in ["Critical", "High"]:
823
- output += "\n\n⚠️ **Safety Recommendations:**"
824
- output += "\nβ€’ Consider reaching out to a domestic violence hotline"
825
- output += "\nβ€’ Document all concerning interactions"
826
- output += "\nβ€’ Have a safety plan in place"
827
-
828
- return output
829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ['GRADIO_SERVER_NAME'] = "0.0.0.0"
3
+ os.environ['GRADIO_SERVER_PORT'] = "7860"
4
+
5
  import gradio as gr
 
6
  import torch
7
  import numpy as np
8
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
9
  import re
10
  import matplotlib.pyplot as plt
11
  import io
 
13
  from datetime import datetime
14
  from torch.nn.functional import sigmoid
15
  from collections import Counter
16
+ import logging
17
+
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.DEBUG)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Device configuration
23
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
24
+ logger.info(f"Using device: {device}")
25
+
26
+ # Model initialization with error handling
27
+ def load_model_and_tokenizer(model_name, model_type="main"):
28
+ try:
29
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
30
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
31
+ model = model.to(device)
32
+ logger.info(f"Successfully loaded {model_type} model: {model_name}")
33
+ return model, tokenizer
34
+ except Exception as e:
35
+ logger.error(f"Error loading {model_type} model {model_name}: {e}")
36
+ return None, None
37
+
38
+ # Load all models
39
+ try:
40
+ # Main abuse model
41
+ model_name = "SamanthaStorm/tether-multilabel-v4"
42
+ model, tokenizer = load_model_and_tokenizer(model_name, "main")
43
+
44
+ # Tone model
45
+ tone_model, tone_tokenizer = load_model_and_tokenizer(
46
+ "SamanthaStorm/tone-tag-multilabel-v1", "tone"
47
+ )
48
+
49
+ # Sentiment model
50
+ sentiment_model, sentiment_tokenizer = load_model_and_tokenizer(
51
+ "SamanthaStorm/tether-sentiment", "sentiment"
52
+ )
53
+
54
+ # DARVO model
55
+ darvo_model, darvo_tokenizer = load_model_and_tokenizer(
56
+ "SamanthaStorm/tether-darvo-regressor-v1", "darvo"
57
+ )
58
+
59
+ if darvo_model:
60
+ darvo_model.eval()
61
 
62
+ except Exception as e:
63
+ logger.error(f"Error during model initialization: {e}")
64
+ raise
65
 
66
+ # Constants and Labels
 
 
 
 
67
  LABELS = [
68
+ "recovery", "control", "gaslighting", "guilt tripping", "dismissiveness",
69
+ "blame shifting", "nonabusive", "projection", "insults",
70
+ "contradictory statements", "obscure language"
71
+ ]
72
+
73
+ TONE_LABELS = [
74
+ "cold invalidation", "coercive warmth", "contradictory gaslight",
75
+ "deflective hostility", "emotional instability", "nonabusive",
76
+ "performative regret", "emotional threat", "forced accountability flip"
77
  ]
78
 
79
+ SENTIMENT_LABELS = ["undermining", "supportive"]
80
+
81
  THRESHOLDS = {
82
  "recovery": 0.4,
83
  "control": 0.45,
 
119
  ("Partner monitors your calls/GPS/social media", 2)
120
  ]
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  RISK_STAGE_LABELS = {
123
+ 1: "πŸŒ€ Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
124
+ 2: "πŸ”₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
125
+ 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
126
+ 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
 
 
 
 
127
  }
128
 
129
+ # Threat Motifs
130
  THREAT_MOTIFS = [
131
+ "i'll kill you", "i'm going to hurt you", "you're dead", "you won't survive this",
132
+ "i'll break your face", "i'll bash your head in", "i'll snap your neck",
133
+ "i'll come over there and make you shut up", "i'll knock your teeth out",
134
+ "you're going to bleed", "you want me to hit you?", "i won't hold back next time",
135
+ "i swear to god i'll beat you", "next time, i won't miss", "i'll make you scream",
136
+ "i know where you live", "i'm outside", "i'll be waiting", "i saw you with him",
137
+ "you can't hide from me", "i'm coming to get you", "i'll find you", "i know your schedule",
138
+ "i watched you leave", "i followed you home", "you'll regret this", "you'll be sorry",
139
+ "you're going to wish you hadn't", "you brought this on yourself", "don't push me",
140
+ "you have no idea what i'm capable of", "you better watch yourself",
141
+ "i don't care what happens to you anymore", "i'll make you suffer", "you'll pay for this",
142
+ "i'll never let you go", "you're nothing without me", "if you leave me, i'll kill myself",
143
+ "i'll ruin you", "i'll tell everyone what you did", "i'll make sure everyone knows",
144
+ "i'm going to destroy your name", "you'll lose everyone", "i'll expose you",
145
+ "your friends will hate you", "i'll post everything", "you'll be cancelled",
146
+ "you'll lose everything", "i'll take the house", "i'll drain your account",
147
+ "you'll never see a dime", "you'll be broke when i'm done", "i'll make sure you lose your job",
148
+ "i'll take your kids", "i'll make sure you have nothing", "you can't afford to leave me",
149
+ "don't make me do this", "you know what happens when i'm mad", "you're forcing my hand",
150
+ "if you just behaved, this wouldn't happen", "this is your fault",
151
+ "you're making me hurt you", "i warned you", "you should have listened"
152
  ]
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  def predict_darvo_score(text):
155
+ """Predict DARVO score for given text"""
156
+ try:
157
+ inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
158
+ inputs = {k: v.to(device) for k, v in inputs.items()}
159
+ with torch.no_grad():
160
+ logits = darvo_model(**inputs).logits
161
+ return round(sigmoid(logits.cpu()).item(), 4)
162
+ except Exception as e:
163
+ logger.error(f"Error in DARVO prediction: {e}")
164
+ return 0.0
165
 
166
  def detect_weapon_language(text):
167
+ """Detect weapon-related language in text"""
168
+ weapon_keywords = ["knife", "gun", "bomb", "weapon", "kill", "stab"]
169
  t = text.lower()
170
  return any(w in t for w in weapon_keywords)
171
 
 
 
 
 
 
 
 
 
172
  def get_risk_stage(patterns, sentiment):
173
+ """Determine risk stage based on patterns and sentiment"""
174
+ try:
175
+ if "insults" in patterns:
176
+ return 2
177
+ elif "recovery" in patterns:
178
+ return 3
179
+ elif "control" in patterns or "guilt tripping" in patterns:
180
+ return 1
181
+ elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
182
+ return 4
183
+ return 1
184
+ except Exception as e:
185
+ logger.error(f"Error determining risk stage: {e}")
186
  return 1
 
 
 
 
 
 
 
187
 
 
188
  def get_emotional_tone_tag(text, emotions, sentiment, patterns, abuse_score):
189
+ """Get emotional tone tag for text"""
190
+ try:
191
+ inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
192
+ inputs = {k: v.to(device) for k, v in inputs.items()}
193
+ with torch.no_grad():
194
+ logits = tone_model(**inputs).logits[0]
195
+ probs = torch.sigmoid(logits).cpu().numpy()
196
+ scores = dict(zip(TONE_LABELS, np.round(probs, 3)))
197
+ return max(scores, key=scores.get)
198
+ except Exception as e:
199
+ logger.error(f"Error in emotional tone analysis: {e}")
200
+ return "unknown"
201
 
 
202
  def compute_abuse_score(matched_scores, sentiment):
203
+ """Compute abuse score from matched patterns and sentiment"""
204
+ try:
205
+ if not matched_scores:
206
+ return 0.0
207
+
208
+ total_weight = sum(weight for _, _, weight in matched_scores)
209
+ if total_weight == 0:
210
+ return 0.0
211
 
212
+ pattern_scores = [(label, score) for label, score, _ in matched_scores]
213
+ sorted_scores = sorted(pattern_scores, key=lambda x: x[1], reverse=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
+ weighted_sum = sum(score * weight for _, score, weight in matched_scores)
216
+ base_score = (weighted_sum / total_weight) * 100
217
+
218
+ # Apply multipliers
219
+ if len(matched_scores) >= 3:
220
+ base_score *= 1.2
221
+
222
+ high_severity_patterns = {'gaslighting', 'control', 'blame shifting'}
223
  if any(label in high_severity_patterns for label, _, _ in matched_scores):
224
+ base_score *= 1.15
225
+
226
+ if any(score > 0.6 for _, score, _ in matched_scores):
227
+ base_score *= 1.1
228
+
229
+ high_scores = len([score for _, score, _ in matched_scores if score > 0.5])
230
+ if high_scores >= 2:
231
+ base_score *= 1.15
232
+
233
+ # Apply sentiment modifiers
234
+ if sentiment == "supportive":
235
+ if any(label in high_severity_patterns for label, _, _ in matched_scores):
236
+ base_score *= 0.9
237
+ else:
238
+ base_score *= 0.85
239
+ elif sentiment == "undermining":
240
+ base_score *= 1.15
241
+
242
+ if any(score > 0.6 for _, score, _ in matched_scores):
243
+ base_score = max(base_score, 65.0)
244
+
245
+ return min(round(base_score, 1), 100.0)
246
+ except Exception as e:
247
+ logger.error(f"Error computing abuse score: {e}")
248
+ return 0.0
249
 
250
  def analyze_single_message(text, thresholds):
251
+ """Analyze a single message for abuse patterns"""
252
+ logger.debug("\n=== DEBUG START ===")
253
+ logger.debug(f"Input text: {text}")
254
+
255
+ try:
256
+ if not text.strip():
257
+ logger.debug("Empty text, returning zeros")
258
+ return 0.0, [], [], {"label": "none"}, 1, 0.0, None
259
+
260
+ # Check for explicit abuse
261
+ explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
262
+ explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
263
+ logger.debug(f"Explicit abuse detected: {explicit_abuse}")
264
+
265
+ # Abuse model inference
266
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
267
+ inputs = {k: v.to(device) for k, v in inputs.items()}
268
+
269
+ with torch.no_grad():
270
+ outputs = model(**inputs)
271
+ raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).cpu().numpy()
272
+
273
+ # Log raw model outputs
274
+ logger.debug("\nRaw model scores:")
275
+ for label, score in zip(LABELS, raw_scores):
276
+ logger.debug(f"{label}: {score:.3f}")
277
+
278
+ # Get predictions and sort them
279
+ predictions = list(zip(LABELS, raw_scores))
280
+ sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
281
+ logger.debug("\nTop 3 predictions:")
282
+ for label, score in sorted_predictions[:3]:
283
+ logger.debug(f"{label}: {score:.3f}")
 
 
284
 
285
+ # Apply thresholds
286
+ threshold_labels = []
287
  if explicit_abuse:
288
+ threshold_labels.append("insults")
289
+ logger.debug("\nForced inclusion of 'insults' due to explicit abuse")
290
+
291
+ for label, score in sorted_predictions:
292
+ base_threshold = thresholds.get(label, 0.25)
293
+ if explicit_abuse:
294
+ base_threshold *= 0.5
295
+ if score > base_threshold:
296
+ if label not in threshold_labels: # Avoid duplicates
297
+ threshold_labels.append(label)
298
+
299
+ logger.debug("\nLabels that passed thresholds:", threshold_labels)
300
+
301
+ # Calculate matched scores
302
+ matched_scores = []
303
+ for label in threshold_labels:
304
+ score = raw_scores[LABELS.index(label)]
305
+ weight = PATTERN_WEIGHTS.get(label, 1.0)
306
+ if explicit_abuse and label == "insults":
307
+ weight *= 1.5
308
+ matched_scores.append((label, score, weight))
309
+
310
+ logger.debug("\nMatched scores (label, score, weight):", matched_scores)
311
+
312
+ # Get sentiment
313
+ sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
314
+ sent_inputs = {k: v.to(device) for k, v in sent_inputs.items()}
315
+ with torch.no_grad():
316
+ sent_logits = sentiment_model(**sent_inputs).logits[0]
317
+ sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
318
+ sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
319
+ logger.debug(f"\nDetected sentiment: {sentiment}")
320
+
321
+ # Get tone
322
+ tone_inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
323
+ tone_inputs = {k: v.to(device) for k, v in tone_inputs.items()}
324
+ with torch.no_grad():
325
+ tone_logits = tone_model(**tone_inputs).logits[0]
326
+ tone_probs = torch.sigmoid(tone_logits).cpu().numpy()
327
+ tone_tag = TONE_LABELS[int(np.argmax(tone_probs))]
328
+ logger.debug(f"Detected tone: {tone_tag}")
329
+
330
+ # Get DARVO score
331
+ darvo_score = predict_darvo_score(text)
332
+ logger.debug(f"DARVO score: {darvo_score}")
333
+
334
+ # Calculate abuse score
335
+ if not matched_scores:
336
+ logger.debug("No matched scores, returning 0")
337
+ return 0.0, [], [], {"label": "undermining"}, 2 if explicit_abuse else 1, 0.0, None
338
+
339
+ abuse_score = compute_abuse_score(matched_scores, sentiment)
340
+
341
+ if explicit_abuse:
342
+ abuse_score = max(abuse_score, 70.0)
343
+
344
+ logger.debug(f"\nCalculated abuse score: {abuse_score}")
 
 
345
 
346
+ # Set stage
347
+ stage = 2 if explicit_abuse or abuse_score > 70 else 1
348
+ logger.debug(f"Final stage: {stage}")
349
+
350
+ logger.debug("=== DEBUG END ===\n")
351
 
352
+ return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
+ except Exception as e:
355
+ logger.error(f"Error in analyze_single_message: {e}")
356
+ return 0.0, [], [], {"label": "error"}, 1, 0.0, None
357
 
358
  def generate_abuse_score_chart(dates, scores, patterns):
359
+ """Generate a timeline chart of abuse scores"""
360
+ try:
361
+ plt.figure(figsize=(10, 6))
362
+ plt.clf()
363
+
364
+ # Create new figure
365
+ fig, ax = plt.subplots(figsize=(10, 6))
366
+
367
+ # Plot points and lines
368
+ x = range(len(scores))
369
+ plt.plot(x, scores, 'bo-', linewidth=2, markersize=8)
370
+
371
+ # Add labels for each point
372
+ for i, (score, pattern) in enumerate(zip(scores, patterns)):
373
+ plt.annotate(
374
+ f'{pattern}\n{score:.0f}%',
375
+ (i, score),
376
+ textcoords="offset points",
377
+ xytext=(0, 10),
378
+ ha='center',
379
+ bbox=dict(
380
+ boxstyle='round,pad=0.5',
381
+ fc='white',
382
+ ec='gray',
383
+ alpha=0.8
384
+ )
385
  )
386
+
387
+ # Customize the plot
388
+ plt.ylim(-5, 105)
389
+ plt.grid(True, linestyle='--', alpha=0.7)
390
+ plt.title('Abuse Pattern Timeline', pad=20, fontsize=12)
391
+ plt.ylabel('Abuse Score %')
392
+
393
+ # X-axis labels
394
+ plt.xticks(x, dates, rotation=45)
395
+
396
+ # Risk level bands
397
+ plt.axhspan(0, 50, color='#90EE90', alpha=0.2) # light green
398
+ plt.axhspan(50, 70, color='#FFD700', alpha=0.2) # gold
399
+ plt.axhspan(70, 85, color='#FFA500', alpha=0.2) # orange
400
+ plt.axhspan(85, 100, color='#FF6B6B', alpha=0.2) # light red
401
+
402
+ # Add risk level labels
403
+ plt.text(-0.2, 25, 'Low Risk', rotation=90, va='center')
404
+ plt.text(-0.2, 60, 'Moderate Risk', rotation=90, va='center')
405
+ plt.text(-0.2, 77.5, 'High Risk', rotation=90, va='center')
406
+ plt.text(-0.2, 92.5, 'Critical Risk', rotation=90, va='center')
407
+
408
+ # Adjust layout
409
+ plt.tight_layout()
410
+
411
+ # Convert plot to image
412
+ buf = io.BytesIO()
413
+ plt.savefig(buf, format='png', bbox_inches='tight')
414
+ buf.seek(0)
415
+ plt.close('all') # Close all figures to prevent memory leaks
416
+
417
+ return Image.open(buf)
418
+ except Exception as e:
419
+ logger.error(f"Error generating abuse score chart: {e}")
420
+ return None
421
 
422
  def analyze_composite(msg1, msg2, msg3, *answers_and_none):
423
+ """Analyze multiple messages and checklist responses"""
424
+ try:
425
+ # Process checklist responses
426
+ none_selected_checked = answers_and_none[-1]
427
+ responses_checked = any(answers_and_none[:-1])
428
+ none_selected = not responses_checked and none_selected_checked
429
+
430
+ # Determine escalation score
431
+ if none_selected:
432
+ escalation_score = 0
433
+ escalation_note = "Checklist completed: no danger items reported."
434
+ escalation_completed = True
435
+ elif responses_checked:
436
+ escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
437
+ escalation_note = "Checklist completed."
438
+ escalation_completed = True
439
+ else:
440
+ escalation_score = None
441
+ escalation_note = "Checklist not completed."
442
+ escalation_completed = False
443
+
444
+ # Process messages
445
+ messages = [msg1, msg2, msg3]
446
+ active = [(m, f"Message {i+1}") for i, m in enumerate(messages) if m.strip()]
447
+ if not active:
448
+ return "Please enter at least one message.", None
449
+
450
+ # Detect threats
451
+ def normalize(text):
452
+ import unicodedata
453
+ text = text.lower().strip()
454
+ text = unicodedata.normalize("NFKD", text)
455
+ text = text.replace("'", "'")
456
+ return re.sub(r"[^a-z0-9 ]", "", text)
457
+
458
+ def detect_threat_motifs(message, motif_list):
459
+ norm_msg = normalize(message)
460
+ return [motif for motif in motif_list if normalize(motif) in norm_msg]
461
+
462
+ # Analyze threats and patterns
463
+ immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
464
+ flat_threats = [t for sublist in immediate_threats for t in sublist]
465
+ threat_risk = "Yes" if flat_threats else "No"
466
 
467
+ # Analyze each message
468
+ results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
469
+
470
+ # Extract scores and metadata
471
+ abuse_scores = [r[0][0] for r in results]
472
+ stages = [r[0][4] for r in results]
473
+ darvo_scores = [r[0][5] for r in results]
474
+ tone_tags = [r[0][6] for r in results]
475
+ dates_used = [r[1] for r in results]
476
+
477
+ # Analyze patterns
478
+ predicted_labels = [label for r in results for label in r[0][1]]
479
+ high = {'control'}
480
+ moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults',
481
+ 'contradictory statements', 'guilt tripping'}
482
+ low = {'blame shifting', 'projection', 'recovery phase'}
483
+
484
+ counts = {'high': 0, 'moderate': 0, 'low': 0}
485
+ for label in predicted_labels:
486
+ if label in high:
487
+ counts['high'] += 1
488
+ elif label in moderate:
489
+ counts['moderate'] += 1
490
+ elif label in low:
491
+ counts['low'] += 1
492
+
493
+ # Determine pattern escalation risk
494
+ if counts['high'] >= 2 and counts['moderate'] >= 2:
495
+ pattern_escalation_risk = "Critical"
496
+ elif (counts['high'] >= 2 and counts['moderate'] >= 1) or \
497
+ (counts['moderate'] >= 3) or \
498
+ (counts['high'] >= 1 and counts['moderate'] >= 2):
499
+ pattern_escalation_risk = "High"
500
+ elif (counts['moderate'] == 2) or \
501
+ (counts['high'] == 1 and counts['moderate'] == 1) or \
502
+ (counts['moderate'] == 1 and counts['low'] >= 2) or \
503
+ (counts['high'] == 1 and sum(counts.values()) == 1):
504
+ pattern_escalation_risk = "Moderate"
505
+ else:
506
+ pattern_escalation_risk = "Low"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
 
508
+ # Calculate escalation risk
509
+ checklist_escalation_risk = "Unknown" if escalation_score is None else (
510
+ "Critical" if escalation_score >= 20 else
511
+ "Moderate" if escalation_score >= 10 else
512
+ "Low"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  )
514
+
515
+ # Calculate escalation bump
516
+ escalation_bump = 0
517
+ for result, _ in results:
518
+ abuse_score, _, _, sentiment, stage, darvo_score, tone_tag = result
519
+ if darvo_score > 0.65:
520
+ escalation_bump += 3
521
+ if tone_tag in ["forced accountability flip", "emotional threat"]:
522
+ escalation_bump += 2
523
+ if abuse_score > 80:
524
+ escalation_bump += 2
525
+ if stage == 2:
526
+ escalation_bump += 3
527
+
528
+ # Calculate combined risk
529
+ def rank(label):
530
+ return {"Low": 0, "Moderate": 1, "High": 2, "Critical": 3, "Unknown": 0}.get(label, 0)
531
+
532
+ combined_score = rank(pattern_escalation_risk) + rank(checklist_escalation_risk) + escalation_bump
533
+ escalation_risk = (
534
+ "Critical" if combined_score >= 6 else
535
+ "High" if combined_score >= 4 else
536
+ "Moderate" if combined_score >= 2 else
537
+ "Low"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
 
540
+ # Build escalation text
541
+ if escalation_score is None:
542
+ escalation_text = (
543
+ "🚫 **Escalation Potential: Unknown** (Checklist not completed)\n"
544
+ "⚠️ This section was not completed. Escalation potential is estimated using message data only.\n"
545
+ )
546
+ hybrid_score = 0
547
+ elif escalation_score == 0:
548
+ escalation_text = (
549
+ "βœ… **Escalation Checklist Completed:** No danger items reported.\n"
550
+ "🧭 **Escalation potential estimated from detected message patterns only.**\n"
551
+ f"β€’ Pattern Risk: {pattern_escalation_risk}\n"
552
+ f"β€’ Checklist Risk: None reported\n"
553
+ f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
554
+ )
555
+ hybrid_score = escalation_bump
556
+ else:
557
+ hybrid_score = escalation_score + escalation_bump
558
+ escalation_text = (
559
+ f"πŸ“ˆ **Escalation Potential: {escalation_risk} ({hybrid_score}/29)**\n"
560
+ "πŸ“‹ This score combines your safety checklist answers *and* detected high-risk behavior.\n"
561
+ f"β€’ Pattern Risk: {pattern_escalation_risk}\n"
562
+ f"β€’ Checklist Risk: {checklist_escalation_risk}\n"
563
+ f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
564
+ )
565
 
566
+ # Calculate composite abuse score
567
+ composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
568
+
569
+ # Get most common stage
570
+ most_common_stage = max(set(stages), key=stages.count)
571
+ stage_text = RISK_STAGE_LABELS[most_common_stage]
572
+
573
+ # Get top labels
574
+ top_labels = []
575
+ for result, _ in results:
576
+ threshold_labels = result[1]
577
+ if threshold_labels:
578
+ top_labels.append(threshold_labels[0])
579
+ else:
580
+ top_labels.append("none")
581
+
582
+ # Calculate average DARVO score
583
+ avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
584
+ darvo_blurb = ""
585
+ if avg_darvo > 0.25:
586
+ level = "moderate" if avg_darvo < 0.65 else "high"
587
+ darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
588
+
589
+ # Build output text
590
+ out = f"Abuse Intensity: {composite_abuse}%\n"
591
+ out += "πŸ“Š This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
592
+ out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
593
+ out += f"\n\n{stage_text}"
594
+ out += darvo_blurb
595
+ out += "\n\n🎭 **Emotional Tones Detected:**\n"
596
+ for i, tone in enumerate(tone_tags):
597
+ out += f"β€’ Message {i+1}: *{tone or 'none'}*\n"
598
+
599
+ # Add threat section
600
+ if flat_threats:
601
+ out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
602
+ for t in set(flat_threats):
603
+ out += f"β€’ \"{t}\"\n"
604
+ out += "\n⚠️ These phrases may indicate an imminent risk to physical safety."
605
+ else:
606
+ out += "\n\n🧩 **Immediate Danger Threats:** None explicitly detected.\n"
607
+ out += "This does *not* rule out risk, but no direct threat phrases were matched."
608
+
609
+ # Generate timeline
610
+ pattern_labels = [
611
+ pats[0][0] if (pats := r[0][2]) else "none"
612
+ for r in results
613
+ ]
614
+ timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
615
+
616
+ # Add escalation text
617
+ out += "\n\n" + escalation_text
618
+ return out, timeline_image
619
+
620
+ except Exception as e:
621
+ logger.error(f"Error in analyze_composite: {e}")
622
+ return "An error occurred during analysis.", None
623
+
624
+ # Gradio Interface Setup
625
+ def create_interface():
626
+ try:
627
+ textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
628
+ quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
629
+ none_box = gr.Checkbox(label="None of the above")
630
+
631
+ demo = gr.Interface(
632
+ fn=analyze_composite,
633
+ inputs=textbox_inputs + quiz_boxes + [none_box],
634
+ outputs=[
635
+ gr.Textbox(label="Results"),
636
+ gr.Image(label="Abuse Score Timeline", type="pil")
637
+ ],
638
+ title="Abuse Pattern Detector + Escalation Quiz",
639
+ description=(
640
+ "Enter up to three messages that concern you. "
641
+ "For the most accurate results, include messages from a recent emotionally intense period."
642
+ ),
643
+ flagging_mode="manual"
644
+ )
645
+ return demo
646
+ except Exception as e:
647
+ logger.error(f"Error creating interface: {e}")
648
+ raise
649
+
650
+ # Main execution
651
+ if __name__ == "__main__":
652
+ try:
653
+ demo = create_interface()
654
+ demo.launch(share=True)
655
+ except Exception as e:
656
+ logger.error(f"Failed to launch app: {e}")
657
+ raise