SamanthaStorm commited on
Commit
cf7ed0f
·
verified ·
1 Parent(s): aee2dc7

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -691
app.py DELETED
@@ -1,691 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- import numpy as np
4
- from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
5
- from motif_tagging import detect_motifs
6
- import re
7
- import matplotlib.pyplot as plt
8
- import io
9
- from PIL import Image
10
- from datetime import datetime
11
- from transformers import pipeline as hf_pipeline # prevent name collision with gradio pipeline
12
-
13
- def get_emotion_profile(text):
14
- emotions = emotion_pipeline(text)
15
- if isinstance(emotions, list) and isinstance(emotions[0], list):
16
- emotions = emotions[0]
17
- return {e['label'].lower(): round(e['score'], 3) for e in emotions}
18
- # Emotion model (no retraining needed)
19
- emotion_pipeline = hf_pipeline(
20
- "text-classification",
21
- model="j-hartmann/emotion-english-distilroberta-base",
22
- top_k=None,
23
- truncation=True
24
- )
25
-
26
- # --- Timeline Visualization Function ---
27
- def generate_abuse_score_chart(dates, scores, labels):
28
- import matplotlib.pyplot as plt
29
- import io
30
- from PIL import Image
31
- from datetime import datetime
32
- import re
33
-
34
- # Determine if all entries are valid dates
35
- if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
36
- parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
37
- x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
38
- else:
39
- parsed_x = list(range(1, len(dates) + 1))
40
- x_labels = [f"Message {i+1}" for i in range(len(dates))]
41
-
42
- fig, ax = plt.subplots(figsize=(8, 3))
43
- ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
44
-
45
- for x, y in zip(parsed_x, scores):
46
- ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
47
-
48
- ax.set_xticks(parsed_x)
49
- ax.set_xticklabels(x_labels)
50
- ax.set_xlabel("") # No axis label
51
- ax.set_ylabel("Abuse Score (%)")
52
- ax.set_ylim(0, 105)
53
- ax.grid(True)
54
- plt.tight_layout()
55
-
56
- buf = io.BytesIO()
57
- plt.savefig(buf, format='png')
58
- buf.seek(0)
59
- return Image.open(buf)
60
-
61
-
62
- # --- Abuse Model ---
63
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
64
-
65
- model_name = "SamanthaStorm/tether-multilabel-v3"
66
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
67
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
68
-
69
- LABELS = [
70
- "recovery", "control", "gaslighting", "guilt tripping", "dismissiveness", "blame shifting",
71
- "nonabusive","projection", "insults", "contradictory statements", "obscure language"
72
- ]
73
-
74
- THRESHOLDS = {
75
- "recovery": 0.4,
76
- "control": 0.45,
77
- "gaslighting": 0.25,
78
- "guilt tripping": .20,
79
- "dismissiveness": 0.25,
80
- "blame shifting": 0.25,
81
- "projection": 0.25,
82
- "insults": 0.05,
83
- "contradictory statements": 0.25,
84
- "obscure language": 0.25,
85
- "nonabusive": 1.0
86
- }
87
-
88
- PATTERN_WEIGHTS = {
89
- "recovery": 0.7,
90
- "control": 1.4,
91
- "gaslighting": 1.50,
92
- "guilt tripping": 1.2,
93
- "dismissiveness": 0.9,
94
- "blame shifting": 0.8,
95
- "projection": 0.5,
96
- "insults": 1.4,
97
- "contradictory statements": 1.0,
98
- "obscure language": 0.9,
99
- "nonabusive": 0.0
100
- }
101
-
102
- ESCALATION_RISKS = {
103
- "blame shifting": "low",
104
- "contradictory statements": "moderate",
105
- "control": "high",
106
- "dismissiveness": "moderate",
107
- "gaslighting": "moderate",
108
- "guilt tripping": "moderate",
109
- "insults": "moderate",
110
- "obscure language": "low",
111
- "projection": "low",
112
- "recovery phase": "low"
113
- }
114
- RISK_STAGE_LABELS = {
115
- 1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
116
- 2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
117
- 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
118
- 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
119
- }
120
-
121
- ESCALATION_QUESTIONS = [
122
- ("Partner has access to firearms or weapons", 4),
123
- ("Partner threatened to kill you", 3),
124
- ("Partner threatened you with a weapon", 3),
125
- ("Partner has ever choked you, even if you considered it consensual at the time", 4),
126
- ("Partner injured or threatened your pet(s)", 3),
127
- ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
128
- ("Partner forced or coerced you into unwanted sexual acts", 3),
129
- ("Partner threatened to take away your children", 2),
130
- ("Violence has increased in frequency or severity", 3),
131
- ("Partner monitors your calls/GPS/social media", 2)
132
- ]
133
- def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
134
- sadness = emotions.get("sadness", 0)
135
- joy = emotions.get("joy", 0)
136
- neutral = emotions.get("neutral", 0)
137
- disgust = emotions.get("disgust", 0)
138
- anger = emotions.get("anger", 0)
139
- fear = emotions.get("fear", 0)
140
- disgust = emotions.get("disgust", 0)
141
-
142
- # 1. Performative Regret
143
- if (
144
- sadness > 0.4 and
145
- any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]) and
146
- (sentiment == "undermining" or abuse_score > 40)
147
- ):
148
- return "performative regret"
149
-
150
- # 2. Coercive Warmth
151
- if (
152
- (joy > 0.3 or sadness > 0.4) and
153
- any(p in patterns for p in ["control", "gaslighting"]) and
154
- sentiment == "undermining"
155
- ):
156
- return "coercive warmth"
157
-
158
- # 3. Cold Invalidation
159
- if (
160
- (neutral + disgust) > 0.5 and
161
- any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
162
- sentiment == "undermining"
163
- ):
164
- return "cold invalidation"
165
-
166
- # 4. Genuine Vulnerability
167
- if (
168
- (sadness + fear) > 0.5 and
169
- sentiment == "supportive" and
170
- all(p in ["recovery phase"] for p in patterns)
171
- ):
172
- return "genuine vulnerability"
173
-
174
- # 5. Emotional Threat
175
- if (
176
- (anger + disgust) > 0.5 and
177
- any(p in patterns for p in ["control", "insults", "dismissiveness"]) and
178
- sentiment == "undermining"
179
- ):
180
- return "emotional threat"
181
-
182
- # 6. Weaponized Sadness
183
- if (
184
- sadness > 0.6 and
185
- any(p in patterns for p in ["guilt tripping", "projection"]) and
186
- sentiment == "undermining"
187
- ):
188
- return "weaponized sadness"
189
-
190
- # 7. Toxic Resignation
191
- if (
192
- neutral > 0.5 and
193
- any(p in patterns for p in ["dismissiveness", "obscure language"]) and
194
- sentiment == "undermining"
195
- ):
196
- return "toxic resignation"
197
- # 8. Aggressive Dismissal
198
- if (
199
- anger > 0.5 and
200
- any(p in patterns for p in ["aggression", "insults", "control"]) and
201
- sentiment == "undermining"
202
- ):
203
- return "aggressive dismissal"
204
- # 9. Deflective Hostility
205
- if (
206
- (0.2 < anger < 0.7 or 0.2 < disgust < 0.7) and
207
- any(p in patterns for p in ["deflection", "projection"]) and
208
- sentiment == "undermining"
209
- ):
210
- return "deflective hostility"
211
- # 10. Mocking Detachment
212
- if (
213
- (neutral + joy) > 0.5 and
214
- any(p in patterns for p in ["mockery", "insults", "projection"]) and
215
- sentiment == "undermining"
216
- ):
217
- return "mocking detachment"
218
- # 11. Contradictory Gaslight
219
- if (
220
- (joy + anger + sadness) > 0.5 and
221
- any(p in patterns for p in ["gaslighting", "contradictory statements"]) and
222
- sentiment == "undermining"
223
- ):
224
- return "contradictory gaslight"
225
- # 12. Calculated Neutrality
226
- if (
227
- neutral > 0.6 and
228
- any(p in patterns for p in ["obscure language", "deflection", "dismissiveness"]) and
229
- sentiment == "undermining"
230
- ):
231
- return "calculated neutrality"
232
- # 13. Forced Accountability Flip
233
- if (
234
- (anger + disgust) > 0.5 and
235
- any(p in patterns for p in ["blame shifting", "manipulation", "projection"]) and
236
- sentiment == "undermining"
237
- ):
238
- return "forced accountability flip"
239
- # 14. Conditional Affection
240
- if (
241
- joy > 0.4 and
242
- any(p in patterns for p in ["apology baiting", "control", "recovery phase"]) and
243
- sentiment == "undermining"
244
- ):
245
- return "conditional affection"
246
-
247
- if (
248
- (anger + disgust) > 0.5 and
249
- any(p in patterns for p in ["blame shifting", "projection", "deflection"]) and
250
- sentiment == "undermining"
251
- ):
252
- return "forced accountability flip"
253
-
254
- # Emotional Instability Fallback
255
- if (
256
- (anger + sadness + disgust) > 0.6 and
257
- sentiment == "undermining"
258
- ):
259
- return "emotional instability"
260
-
261
- return None
262
- # 🔄 New DARVO score model (regression-based)
263
- from torch.nn.functional import sigmoid
264
- import torch
265
-
266
- # Load your trained DARVO regressor from Hugging Face Hub
267
- darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
268
- darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
269
- darvo_model.eval()
270
-
271
- def predict_darvo_score(text):
272
- inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
273
- with torch.no_grad():
274
- logits = darvo_model(**inputs).logits
275
- score = sigmoid(logits).item()
276
- return round(score, 4) # Rounded for display/output
277
- def detect_weapon_language(text):
278
- weapon_keywords = [
279
- "knife", "knives", "stab", "cut you", "cutting",
280
- "gun", "shoot", "rifle", "firearm", "pistol",
281
- "bomb", "blow up", "grenade", "explode",
282
- "weapon", "armed", "loaded", "kill you", "take you out"
283
- ]
284
- text_lower = text.lower()
285
- return any(word in text_lower for word in weapon_keywords)
286
- def get_risk_stage(patterns, sentiment):
287
- if "insults" in patterns:
288
- return 2
289
- elif "recovery phase" in patterns:
290
- return 3
291
- elif "control" in patterns or "guilt tripping" in patterns:
292
- return 1
293
- elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
294
- return 4
295
- return 1
296
-
297
- def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
298
- import re
299
-
300
- # Extract aggression score if aggression is detected
301
- if isinstance(top_label, str) and "aggression" in top_label.lower():
302
- try:
303
- match = re.search(r"\(?(\d+)\%?\)?", top_label)
304
- aggression_score = int(match.group(1)) / 100 if match else 0
305
- except:
306
- aggression_score = 0
307
- else:
308
- aggression_score = 0
309
-
310
- # Revised risk logic
311
- if abuse_score >= 85 or escalation_score >= 16:
312
- risk_level = "high"
313
- elif abuse_score >= 60 or escalation_score >= 8 or aggression_score >= 0.25:
314
- risk_level = "moderate"
315
- elif stage == 2 and abuse_score >= 40:
316
- risk_level = "moderate"
317
- else:
318
- risk_level = "low"
319
-
320
- if isinstance(top_label, str) and " – " in top_label:
321
- pattern_label, pattern_score = top_label.split(" – ")
322
- else:
323
- pattern_label = str(top_label) if top_label is not None else "Unknown"
324
- pattern_score = ""
325
-
326
- WHY_FLAGGED = {
327
- "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
328
- "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
329
- "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
330
- "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
331
- "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
332
- "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
333
- "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
334
- "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
335
- "contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
336
- "obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
337
- "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
338
- }
339
-
340
- explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
341
-
342
- base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
343
- base += f"This message shows strong indicators of **{pattern_label}**. "
344
-
345
- if risk_level == "high":
346
- base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
347
- elif risk_level == "moderate":
348
- base += "There are signs of emotional pressure or verbal aggression that may escalate if repeated.\n"
349
- else:
350
- base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
351
-
352
- base += f"\n💡 *Why this might be flagged:*\n{explanation}\n"
353
- base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
354
- base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
355
- return base
356
-
357
-
358
- # --- Step X: Detect Immediate Danger Threats ---
359
- THREAT_MOTIFS = [
360
- "i'll kill you", "i’m going to hurt you", "you’re dead", "you won't survive this",
361
- "i’ll break your face", "i'll bash your head in", "i’ll snap your neck",
362
- "i’ll come over there and make you shut up", "i'll knock your teeth out",
363
- "you’re going to bleed", "you want me to hit you?", "i won’t hold back next time",
364
- "i swear to god i’ll beat you", "next time, i won’t miss", "i’ll make you scream",
365
- "i know where you live", "i'm outside", "i’ll be waiting", "i saw you with him",
366
- "you can’t hide from me", "i’m coming to get you", "i'll find you", "i know your schedule",
367
- "i watched you leave", "i followed you home", "you'll regret this", "you’ll be sorry",
368
- "you’re going to wish you hadn’t", "you brought this on yourself", "don’t push me",
369
- "you have no idea what i’m capable of", "you better watch yourself",
370
- "i don’t care what happens to you anymore", "i’ll make you suffer", "you’ll pay for this",
371
- "i’ll never let you go", "you’re nothing without me", "if you leave me, i’ll kill myself",
372
- "i'll ruin you", "i'll tell everyone what you did", "i’ll make sure everyone knows",
373
- "i’m going to destroy your name", "you’ll lose everyone", "i’ll expose you",
374
- "your friends will hate you", "i’ll post everything", "you’ll be cancelled",
375
- "you’ll lose everything", "i’ll take the house", "i’ll drain your account",
376
- "you’ll never see a dime", "you’ll be broke when i’m done", "i’ll make sure you lose your job",
377
- "i’ll take your kids", "i’ll make sure you have nothing", "you can’t afford to leave me",
378
- "don't make me do this", "you know what happens when i’m mad", "you’re forcing my hand",
379
- "if you just behaved, this wouldn’t happen", "this is your fault",
380
- "you’re making me hurt you", "i warned you", "you should have listened"
381
- ]
382
-
383
-
384
- def compute_abuse_score(matched_scores, sentiment):
385
- if not matched_scores:
386
- return 0
387
-
388
- # Weighted average of passed patterns
389
- weighted_total = sum(score * weight for _, score, weight in matched_scores)
390
- weight_sum = sum(weight for _, _, weight in matched_scores)
391
- base_score = (weighted_total / weight_sum) * 100
392
-
393
- # Boost for pattern count
394
- pattern_count = len(matched_scores)
395
- scale = 1.0 + 0.25 * max(0, pattern_count - 1) # 1.25x for 2, 1.5x for 3+
396
- scaled_score = base_score * scale
397
-
398
- # Pattern floors
399
- FLOORS = {
400
- "control": 40,
401
- "gaslighting": 30,
402
- "insults": 25,
403
- "aggression": 40
404
- }
405
- floor = max(FLOORS.get(label, 0) for label, _, _ in matched_scores)
406
- adjusted_score = max(scaled_score, floor)
407
-
408
- # Sentiment tweak
409
- if sentiment == "undermining" and adjusted_score < 50:
410
- adjusted_score += 10
411
-
412
- return min(adjusted_score, 100)
413
-
414
-
415
- def analyze_single_message(text, thresholds):
416
- motif_hits, matched_phrases = detect_motifs(text)
417
-
418
- # Get emotion profile
419
- emotion_profile = get_emotion_profile(text)
420
- sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
421
-
422
- # Get model scores
423
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
424
- with torch.no_grad():
425
- outputs = model(**inputs)
426
- scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
427
-
428
- # Sentiment override if neutral is high while critical thresholds are passed
429
- if emotion_profile.get("neutral", 0) > 0.85 and any(
430
- scores[LABELS.index(l)] > thresholds[l]
431
- for l in ["control", "blame shifting"]
432
- ):
433
- sentiment = "undermining"
434
- else:
435
- sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
436
-
437
- weapon_flag = detect_weapon_language(text)
438
-
439
- adjusted_thresholds = {
440
- k: v + 0.05 if sentiment == "supportive" else v
441
- for k, v in thresholds.items()
442
- }
443
- darvo_score = predict_darvo_score(text)
444
-
445
- threshold_labels = [
446
- label for label, score in zip(LABELS, scores)
447
- if score > adjusted_thresholds[label]
448
- ]
449
- tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, 0)
450
-
451
-
452
- top_patterns = sorted(
453
- [(label, score) for label, score in zip(LABELS, scores)],
454
- key=lambda x: x[1],
455
- reverse=True
456
- )[:2]
457
- # Post-threshold validation: strip recovery if it occurs with undermining sentiment
458
- if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
459
- threshold_labels.remove("recovery")
460
- top_patterns = [p for p in top_patterns if p[0] != "recovery"]
461
- print("⚠️ Removing 'recovery' due to undermining sentiment (not genuine repair)")
462
-
463
- matched_scores = [
464
- (label, score, PATTERN_WEIGHTS.get(label, 1.0))
465
- for label, score in zip(LABELS, scores)
466
- if score > adjusted_thresholds[label]
467
- ]
468
-
469
- abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
470
- abuse_score = abuse_score_raw
471
-
472
- # Risk stage logic
473
- stage = get_risk_stage(threshold_labels, sentiment) if threshold_labels else 1
474
- if weapon_flag and stage < 2:
475
- stage = 2
476
- if weapon_flag:
477
- abuse_score_raw = min(abuse_score_raw + 25, 100)
478
-
479
- abuse_score = min(
480
- abuse_score_raw,
481
- 100 if "control" in threshold_labels else 95
482
- )
483
-
484
- # Tag must happen after abuse score is finalized
485
- tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
486
-
487
- # ---- Profanity + Anger Override Logic ----
488
- profane_words = {"fuck", "fucking", "bitch", "shit", "cunt", "ho", "asshole", "dick", "whore", "slut"}
489
- tokens = set(text.lower().split())
490
- has_profane = any(word in tokens for word in profane_words)
491
-
492
- anger_score = emotion_profile.get("Anger", 0)
493
- short_text = len(tokens) <= 10
494
- insult_score = next((s for l, s in top_patterns if l == "insults"), 0)
495
-
496
- if has_profane and anger_score > 0.75 and short_text:
497
- print("⚠️ Profanity + Anger Override Triggered")
498
- top_patterns = sorted(top_patterns, key=lambda x: x[1], reverse=True)
499
- if top_patterns[0][0] != "insults":
500
- top_patterns.insert(0, ("insults", insult_score))
501
- if "insults" not in threshold_labels:
502
- threshold_labels.append("insults")
503
- top_patterns = [("insults", insult_score)] + [p for p in top_patterns if p[0] != "insults"]
504
- # Debug
505
- print(f"Emotional Tone Tag: {tone_tag}")
506
- # Debug
507
- print(f"Emotional Tone Tag: {tone_tag}")
508
- print("Emotion Profile:")
509
- for emotion, score in emotion_profile.items():
510
- print(f" {emotion.capitalize():10}: {score}")
511
- print("\n--- Debug Info ---")
512
- print(f"Text: {text}")
513
- print(f"Sentiment (via emotion): {sentiment} (score: {round(sentiment_score, 3)})")
514
- print("Abuse Pattern Scores:")
515
- for label, score in zip(LABELS, scores):
516
- passed = "✅" if score > adjusted_thresholds[label] else "❌"
517
- print(f" {label:25} → {score:.3f} {passed}")
518
- print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
519
- print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
520
- print("------------------\n")
521
-
522
- return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
523
-
524
- def analyze_composite(msg1, msg2, msg3, *answers_and_none):
525
- from collections import Counter
526
-
527
- none_selected_checked = answers_and_none[-1]
528
- responses_checked = any(answers_and_none[:-1])
529
- none_selected = not responses_checked and none_selected_checked
530
-
531
- escalation_score = None
532
- if not none_selected:
533
- escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
534
-
535
- messages = [msg1, msg2, msg3]
536
- active = [(m, f"Message {i+1}") for i, m in enumerate(messages) if m.strip()]
537
- if not active:
538
- return "Please enter at least one message."
539
-
540
- # Flag any threat phrases present in the messages
541
- import re
542
-
543
- def normalize(text):
544
- import unicodedata
545
- text = text.lower().strip()
546
- text = unicodedata.normalize("NFKD", text) # handles curly quotes
547
- text = text.replace("’", "'") # smart to straight
548
- return re.sub(r"[^a-z0-9 ]", "", text)
549
-
550
- def detect_threat_motifs(message, motif_list):
551
- norm_msg = normalize(message)
552
- return [
553
- motif for motif in motif_list
554
- if normalize(motif) in norm_msg
555
- ]
556
-
557
- # Collect matches per message
558
- immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
559
- flat_threats = [t for sublist in immediate_threats for t in sublist]
560
- threat_risk = "Yes" if flat_threats else "No"
561
- results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
562
-
563
- abuse_scores = [r[0][0] for r in results]
564
- stages = [r[0][4] for r in results]
565
- darvo_scores = [r[0][5] for r in results]
566
- tone_tags = [r[0][6] for r in results]
567
- dates_used = [r[1] for r in results]
568
-
569
- predicted_labels = [label for r in results for label, _ in r[0][2]]
570
- high = {'control'}
571
- moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults', 'contradictory statements', 'guilt tripping'}
572
- low = {'blame shifting', 'projection', 'recovery phase'}
573
- counts = {'high': 0, 'moderate': 0, 'low': 0}
574
- for label in predicted_labels:
575
- if label in high:
576
- counts['high'] += 1
577
- elif label in moderate:
578
- counts['moderate'] += 1
579
- elif label in low:
580
- counts['low'] += 1
581
-
582
- # Pattern escalation logic
583
- pattern_escalation_risk = "Low"
584
- if counts['high'] >= 2 and counts['moderate'] >= 2:
585
- pattern_escalation_risk = "Critical"
586
- elif (counts['high'] >= 2 and counts['moderate'] >= 1) or (counts['moderate'] >= 3) or (counts['high'] >= 1 and counts['moderate'] >= 2):
587
- pattern_escalation_risk = "High"
588
- elif (counts['moderate'] == 2) or (counts['high'] == 1 and counts['moderate'] == 1) or (counts['moderate'] == 1 and counts['low'] >= 2) or (counts['high'] == 1 and sum(counts.values()) == 1):
589
- pattern_escalation_risk = "Moderate"
590
-
591
- checklist_escalation_risk = "Unknown" if escalation_score is None else (
592
- "Critical" if escalation_score >= 20 else
593
- "Moderate" if escalation_score >= 10 else
594
- "Low"
595
- )
596
-
597
- escalation_bump = 0
598
- for result, _ in results:
599
- abuse_score, _, _, sentiment, stage, darvo_score, tone_tag = result
600
- if darvo_score > 0.65:
601
- escalation_bump += 3
602
- if tone_tag in ["forced accountability flip", "emotional threat"]:
603
- escalation_bump += 2
604
- if abuse_score > 80:
605
- escalation_bump += 2
606
- if stage == 2:
607
- escalation_bump += 3
608
-
609
- def rank(label):
610
- return {"Low": 0, "Moderate": 1, "High": 2, "Critical": 3, "Unknown": 0}.get(label, 0)
611
-
612
- combined_score = rank(pattern_escalation_risk) + rank(checklist_escalation_risk) + escalation_bump
613
- escalation_risk = (
614
- "Critical" if combined_score >= 6 else
615
- "High" if combined_score >= 4 else
616
- "Moderate" if combined_score >= 2 else
617
- "Low"
618
- )
619
-
620
- if escalation_score is None:
621
- escalation_text = "🚫 **Escalation Potential: Unknown** (Checklist not completed)\n⚠️ This section was not completed. Escalation potential is estimated using message data only.\n"
622
- hybrid_score = 0
623
- else:
624
- hybrid_score = escalation_score + escalation_bump
625
- escalation_text = f"📈 **Escalation Potential: {escalation_risk} ({hybrid_score}/29)**\n"
626
- escalation_text += "📋 This score combines your safety checklist answers *and* detected high-risk behavior.\n"
627
- escalation_text += f"• Pattern Risk: {pattern_escalation_risk}\n"
628
- escalation_text += f"• Checklist Risk: {checklist_escalation_risk}\n"
629
- escalation_text += f"• Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
630
-
631
- # Composite Abuse Score
632
- composite_abuse_scores = []
633
- for result, _ in results:
634
- _, _, top_patterns, sentiment, _, _, _ = result
635
- matched_scores = [(label, score, PATTERN_WEIGHTS.get(label, 1.0)) for label, score in top_patterns]
636
- final_score = compute_abuse_score(matched_scores, sentiment["label"])
637
- composite_abuse_scores.append(final_score)
638
- composite_abuse = int(round(sum(composite_abuse_scores) / len(composite_abuse_scores)))
639
-
640
- most_common_stage = max(set(stages), key=stages.count)
641
- stage_text = RISK_STAGE_LABELS[most_common_stage]
642
- # Derive top label list for each message
643
- top_labels = [r[0][1][0] if r[0][1] else r[0][2][0][0] for r in results]
644
- avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
645
- darvo_blurb = ""
646
- if avg_darvo > 0.25:
647
- level = "moderate" if avg_darvo < 0.65 else "high"
648
- darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
649
-
650
- out = f"Abuse Intensity: {composite_abuse}%\n"
651
- out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
652
- out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
653
- out += f"\n\n{stage_text}"
654
- out += darvo_blurb
655
- out += "\n\n🎭 **Emotional Tones Detected:**\n"
656
- for i, tone in enumerate(tone_tags):
657
- out += f"• Message {i+1}: *{tone or 'none'}*\n"
658
- # --- Add Immediate Danger Threats section
659
- if flat_threats:
660
- out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
661
- for t in set(flat_threats):
662
- out += f"• \"{t}\"\n"
663
- out += "\n⚠️ These phrases may indicate an imminent risk to physical safety."
664
- else:
665
- out += "\n\n🧩 **Immediate Danger Threats:** None explicitly detected.\n"
666
- out += "This does *not* rule out risk, but no direct threat phrases were matched."
667
- pattern_labels = [r[0][2][0][0] for r in results]
668
- timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
669
- out += "\n\n" + escalation_text
670
-
671
- return out, timeline_image
672
-
673
- textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
674
- quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
675
- none_box = gr.Checkbox(label="None of the above")
676
-
677
- iface = gr.Interface(
678
- fn=analyze_composite,
679
- inputs=textbox_inputs + quiz_boxes + [none_box],
680
- outputs=[
681
- gr.Textbox(label="Results"),
682
- gr.Image(label="Abuse Score Timeline", type="pil")
683
- ],
684
- title="Abuse Pattern Detector + Escalation Quiz",
685
- description="Enter up to three messages that concern you. For the most accurate results, enter messages that happened during a recent time period that felt emotionally intense or 'off.'",
686
- allow_flagging="manual"
687
- )
688
-
689
- if __name__ == "__main__":
690
- iface.launch()
691
-