SamanthaStorm commited on
Commit
7df97d2
Β·
verified Β·
1 Parent(s): 9784a67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -134
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import spaces
3
  import torch
4
  import numpy as np
5
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
6
  import re
7
  import matplotlib.pyplot as plt
8
  import io
@@ -11,41 +11,37 @@ from datetime import datetime
11
  from torch.nn.functional import sigmoid
12
  from collections import Counter
13
  import logging
14
- from transformers import pipeline as hf_pipeline
15
-
16
- # Add this with your other model loading code
17
- emotion_pipeline = hf_pipeline(
18
- "text-classification",
19
- model="j-hartmann/emotion-english-distilroberta-base",
20
- top_k=6,
21
- truncation=True,
22
- device=0 if torch.cuda.is_available() else -1 # GPU support
23
- )
24
-
25
- # Add this after imports
26
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
27
 
28
  # Set up logging
29
  logging.basicConfig(level=logging.DEBUG)
30
  logger = logging.getLogger(__name__)
31
 
 
 
 
32
 
33
-
34
- # Model initialization with error handling
35
  # Model initialization
36
  model_name = "SamanthaStorm/tether-multilabel-v4"
37
  model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
38
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
39
 
40
-
41
  # Sentiment model
42
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment").to(device)
43
  sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment", use_fast=False)
44
 
 
 
 
 
 
 
 
 
 
45
  # DARVO model
46
  darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1").to(device)
47
  darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
48
-
49
 
50
  # Constants and Labels
51
  LABELS = [
@@ -54,12 +50,6 @@ LABELS = [
54
  "contradictory statements", "obscure language"
55
  ]
56
 
57
- TONE_LABELS = [
58
- "cold invalidation", "coercive warmth", "contradictory gaslight",
59
- "deflective hostility", "emotional instability", "nonabusive",
60
- "performative regret", "emotional threat", "forced accountability flip"
61
- ]
62
-
63
  SENTIMENT_LABELS = ["undermining", "supportive"]
64
 
65
  THRESHOLDS = {
@@ -89,7 +79,6 @@ PATTERN_WEIGHTS = {
89
  "obscure language": 0.9,
90
  "nonabusive": 0.0
91
  }
92
-
93
  ESCALATION_QUESTIONS = [
94
  ("Partner has access to firearms or weapons", 4),
95
  ("Partner threatened to kill you", 3),
@@ -110,7 +99,6 @@ RISK_STAGE_LABELS = {
110
  4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
111
  }
112
 
113
- # Threat Motifs
114
  THREAT_MOTIFS = [
115
  "i'll kill you", "i'm going to hurt you", "you're dead", "you won't survive this",
116
  "i'll break your face", "i'll bash your head in", "i'll snap your neck",
@@ -134,14 +122,16 @@ THREAT_MOTIFS = [
134
  "if you just behaved, this wouldn't happen", "this is your fault",
135
  "you're making me hurt you", "i warned you", "you should have listened"
136
  ]
 
137
  def get_emotion_profile(text):
 
138
  emotions = emotion_pipeline(text)
139
  if isinstance(emotions, list) and isinstance(emotions[0], list):
140
  emotions = emotions[0]
141
  return {e['label'].lower(): round(e['score'], 3) for e in emotions}
142
 
143
  def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
144
- # Get emotions first
145
  emotions = get_emotion_profile(text)
146
 
147
  sadness = emotions.get("sadness", 0)
@@ -151,10 +141,10 @@ def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
151
  anger = emotions.get("anger", 0)
152
  fear = emotions.get("fear", 0)
153
 
154
- # 1. Performative Regret
155
  if (
156
  sadness > 0.4 and
157
- any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]) and
158
  (sentiment == "undermining" or abuse_score > 40)
159
  ):
160
  return "performative regret"
@@ -172,13 +162,14 @@ def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
172
  (neutral + disgust) > 0.5 and
173
  any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
174
  sentiment == "undermining"
175
- ):return "cold invalidation"
 
176
 
177
  # 4. Genuine Vulnerability
178
  if (
179
  (sadness + fear) > 0.5 and
180
  sentiment == "supportive" and
181
- all(p in ["recovery phase"] for p in patterns)
182
  ):
183
  return "genuine vulnerability"
184
 
@@ -205,59 +196,35 @@ def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
205
  sentiment == "undermining"
206
  ):
207
  return "toxic resignation"
208
- # 8. Aggressive Dismissal
 
209
  if (
210
  anger > 0.5 and
211
- any(p in patterns for p in ["aggression", "insults", "control"]) and
212
  sentiment == "undermining"
213
  ):
214
  return "aggressive dismissal"
 
215
  # 9. Deflective Hostility
216
  if (
217
  (0.2 < anger < 0.7 or 0.2 < disgust < 0.7) and
218
- any(p in patterns for p in ["deflection", "projection"]) and
219
- sentiment == "undermining"
220
- ):
221
- return "deflective hostility"
222
- # 10. Mocking Detachment
223
- if (
224
- (neutral + joy) > 0.5 and
225
- any(p in patterns for p in ["mockery", "insults", "projection"]) and
226
  sentiment == "undermining"
227
  ):
228
- return "mocking detachment"
229
- # 11. Contradictory Gaslight
 
230
  if (
231
  (joy + anger + sadness) > 0.5 and
232
  any(p in patterns for p in ["gaslighting", "contradictory statements"]) and
233
  sentiment == "undermining"
234
  ):
235
  return "contradictory gaslight"
236
- # 12. Calculated Neutrality
237
- if (
238
- neutral > 0.6 and
239
- any(p in patterns for p in ["obscure language", "deflection", "dismissiveness"]) and
240
- sentiment == "undermining"
241
- ):
242
- return "calculated neutrality"
243
- # 13. Forced Accountability Flip
244
- if (
245
- (anger + disgust) > 0.5 and
246
- any(p in patterns for p in ["blame shifting", "manipulation", "projection"]) and
247
- sentiment == "undermining"
248
- ):
249
- return "forced accountability flip"
250
- # 14. Conditional Affection
251
- if (
252
- joy > 0.4 and
253
- any(p in patterns for p in ["apology baiting", "control", "recovery phase"]) and
254
- sentiment == "undermining"
255
- ):
256
- return "conditional affection"
257
-
258
  if (
259
  (anger + disgust) > 0.5 and
260
- any(p in patterns for p in ["blame shifting", "projection", "deflection"]) and
261
  sentiment == "undermining"
262
  ):
263
  return "forced accountability flip"
@@ -268,9 +235,9 @@ def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
268
  sentiment == "undermining"
269
  ):
270
  return "emotional instability"
271
-
272
- return None
273
 
 
 
274
  def predict_darvo_score(text):
275
  """Predict DARVO score for given text"""
276
  try:
@@ -305,17 +272,13 @@ def get_risk_stage(patterns, sentiment):
305
  logger.error(f"Error determining risk stage: {e}")
306
  return 1
307
 
308
-
309
  @spaces.GPU
310
  def compute_abuse_score(matched_scores, sentiment):
311
- """
312
- Compute abuse score from matched patterns and sentiment
313
- """
314
  try:
315
  if not matched_scores:
316
  return 0.0
317
 
318
- # Calculate weighted score
319
  total_weight = sum(weight for _, _, weight in matched_scores)
320
  if total_weight == 0:
321
  return 0.0
@@ -326,7 +289,7 @@ def compute_abuse_score(matched_scores, sentiment):
326
  weighted_sum = sum(score * weight for _, score, weight in matched_scores)
327
  base_score = (weighted_sum / total_weight) * 100
328
 
329
- # Apply multipliers
330
  if len(matched_scores) >= 3:
331
  base_score *= 1.2
332
 
@@ -358,7 +321,6 @@ def compute_abuse_score(matched_scores, sentiment):
358
  logger.error(f"Error computing abuse score: {e}")
359
  return 0.0
360
 
361
-
362
  @spaces.GPU
363
  def analyze_single_message(text, thresholds):
364
  """Analyze a single message for abuse patterns"""
@@ -374,19 +336,10 @@ def analyze_single_message(text, thresholds):
374
  explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
375
  explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
376
  logger.debug(f"Explicit abuse detected: {explicit_abuse}")
377
- # Get sentiment
378
- sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
379
- sent_inputs = {k: v.to(device) for k, v in sent_inputs.items()}
380
- with torch.no_grad():
381
- sent_logits = sentiment_model(**sent_inputs).logits[0]
382
- sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
383
- sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
384
 
385
- # Get tone using emotion-based approach
386
- tone_tag = get_emotional_tone_tag(text, sentiment, threshold_labels, abuse_score)
387
  # Abuse model inference
388
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
389
- inputs = {k: v.to(device) for k, v in inputs.items()} # Move to GPU
390
 
391
  with torch.no_grad():
392
  outputs = model(**inputs)
@@ -415,7 +368,7 @@ def analyze_single_message(text, thresholds):
415
  if explicit_abuse:
416
  base_threshold *= 0.5
417
  if score > base_threshold:
418
- if label not in threshold_labels: # Avoid duplicates
419
  threshold_labels.append(label)
420
 
421
  logger.debug("\nLabels that passed thresholds:", threshold_labels)
@@ -428,8 +381,6 @@ def analyze_single_message(text, thresholds):
428
  if explicit_abuse and label == "insults":
429
  weight *= 1.5
430
  matched_scores.append((label, score, weight))
431
-
432
- logger.debug("\nMatched scores (label, score, weight):", matched_scores)
433
 
434
  # Get sentiment
435
  sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
@@ -438,38 +389,23 @@ def analyze_single_message(text, thresholds):
438
  sent_logits = sentiment_model(**sent_inputs).logits[0]
439
  sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
440
  sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
441
- logger.debug(f"\nDetected sentiment: {sentiment}")
442
-
443
- # Get tone
444
- tone_inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
445
- tone_inputs = {k: v.to(device) for k, v in tone_inputs.items()}
446
- with torch.no_grad():
447
- tone_logits = tone_model(**tone_inputs).logits[0]
448
- tone_probs = torch.sigmoid(tone_logits).cpu().numpy()
449
- tone_tag = TONE_LABELS[int(np.argmax(tone_probs))]
450
- logger.debug(f"Detected tone: {tone_tag}")
451
-
452
- # Get DARVO score
453
- darvo_score = predict_darvo_score(text)
454
- logger.debug(f"DARVO score: {darvo_score}")
455
 
456
  # Calculate abuse score
457
- if not matched_scores:
458
- logger.debug("No matched scores, returning 0")
459
- return 0.0, [], [], {"label": "undermining"}, 2 if explicit_abuse else 1, 0.0, None
460
-
461
  abuse_score = compute_abuse_score(matched_scores, sentiment)
462
-
463
  if explicit_abuse:
464
  abuse_score = max(abuse_score, 70.0)
465
-
466
- logger.debug(f"\nCalculated abuse score: {abuse_score}")
 
 
 
 
467
 
468
  # Set stage
469
  stage = 2 if explicit_abuse or abuse_score > 70 else 1
470
- logger.debug(f"Final stage: {stage}")
471
 
472
  logger.debug("=== DEBUG END ===\n")
 
473
  return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
474
 
475
  except Exception as e:
@@ -547,7 +483,6 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
547
  responses_checked = any(answers_and_none[:-1])
548
  none_selected = not responses_checked and none_selected_checked
549
 
550
- # Determine escalation score
551
  if none_selected:
552
  escalation_score = 0
553
  escalation_note = "Checklist completed: no danger items reported."
@@ -599,7 +534,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
599
  high = {'control'}
600
  moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults',
601
  'contradictory statements', 'guilt tripping'}
602
- low = {'blame shifting', 'projection', 'recovery phase'}
603
 
604
  counts = {'high': 0, 'moderate': 0, 'low': 0}
605
  for label in predicted_labels:
@@ -610,7 +545,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
610
  elif label in low:
611
  counts['low'] += 1
612
 
613
- # Determine pattern escalation risk
614
  if counts['high'] >= 2 and counts['moderate'] >= 2:
615
  pattern_escalation_risk = "Critical"
616
  elif (counts['high'] >= 2 and counts['moderate'] >= 1) or \
@@ -683,40 +618,63 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
683
  f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
684
  )
685
 
686
- # Calculate composite abuse score
687
  composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
688
 
689
  # Get most common stage
690
  most_common_stage = max(set(stages), key=stages.count)
691
  stage_text = RISK_STAGE_LABELS[most_common_stage]
692
 
693
- # Get top labels
694
- top_labels = []
695
- for result, _ in results:
696
- threshold_labels = result[1]
697
- if threshold_labels:
698
- top_labels.append(threshold_labels[0])
699
- else:
700
- top_labels.append("none")
701
 
702
- # Calculate average DARVO score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
703
  avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
704
- darvo_blurb = ""
705
  if avg_darvo > 0.25:
706
  level = "moderate" if avg_darvo < 0.65 else "high"
707
- darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
708
 
709
- # Build output text
710
- out = f"Abuse Intensity: {composite_abuse}%\n"
711
- out += "πŸ“Š This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
712
- out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
713
- out += f"\n\n{stage_text}"
714
- out += darvo_blurb
715
  out += "\n\n🎭 **Emotional Tones Detected:**\n"
716
  for i, tone in enumerate(tone_tags):
717
  out += f"β€’ Message {i+1}: *{tone or 'none'}*\n"
718
 
719
- # Add threat section
720
  if flat_threats:
721
  out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
722
  for t in set(flat_threats):
@@ -767,7 +725,7 @@ def create_interface():
767
  logger.error(f"Error creating interface: {e}")
768
  raise
769
 
770
-
771
  if __name__ == "__main__":
772
  try:
773
  demo = create_interface()
@@ -777,4 +735,5 @@ if __name__ == "__main__":
777
  share=False
778
  )
779
  except Exception as e:
780
- print(f"Error launching app: {e}")
 
 
2
  import spaces
3
  import torch
4
  import numpy as np
5
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline as hf_pipeline
6
  import re
7
  import matplotlib.pyplot as plt
8
  import io
 
11
  from torch.nn.functional import sigmoid
12
  from collections import Counter
13
  import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Set up logging
16
  logging.basicConfig(level=logging.DEBUG)
17
  logger = logging.getLogger(__name__)
18
 
19
+ # Device configuration
20
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
21
+ logger.info(f"Using device: {device}")
22
 
 
 
23
  # Model initialization
24
  model_name = "SamanthaStorm/tether-multilabel-v4"
25
  model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
26
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
27
 
 
28
  # Sentiment model
29
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment").to(device)
30
  sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment", use_fast=False)
31
 
32
+ # Emotion pipeline
33
+ emotion_pipeline = hf_pipeline(
34
+ "text-classification",
35
+ model="j-hartmann/emotion-english-distilroberta-base",
36
+ top_k=6,
37
+ truncation=True,
38
+ device=0 if torch.cuda.is_available() else -1
39
+ )
40
+
41
  # DARVO model
42
  darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1").to(device)
43
  darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
44
+ darvo_model.eval()
45
 
46
  # Constants and Labels
47
  LABELS = [
 
50
  "contradictory statements", "obscure language"
51
  ]
52
 
 
 
 
 
 
 
53
  SENTIMENT_LABELS = ["undermining", "supportive"]
54
 
55
  THRESHOLDS = {
 
79
  "obscure language": 0.9,
80
  "nonabusive": 0.0
81
  }
 
82
  ESCALATION_QUESTIONS = [
83
  ("Partner has access to firearms or weapons", 4),
84
  ("Partner threatened to kill you", 3),
 
99
  4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
100
  }
101
 
 
102
  THREAT_MOTIFS = [
103
  "i'll kill you", "i'm going to hurt you", "you're dead", "you won't survive this",
104
  "i'll break your face", "i'll bash your head in", "i'll snap your neck",
 
122
  "if you just behaved, this wouldn't happen", "this is your fault",
123
  "you're making me hurt you", "i warned you", "you should have listened"
124
  ]
125
+
126
  def get_emotion_profile(text):
127
+ """Get emotion profile from text"""
128
  emotions = emotion_pipeline(text)
129
  if isinstance(emotions, list) and isinstance(emotions[0], list):
130
  emotions = emotions[0]
131
  return {e['label'].lower(): round(e['score'], 3) for e in emotions}
132
 
133
  def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
134
+ """Get emotional tone tag based on emotions and patterns"""
135
  emotions = get_emotion_profile(text)
136
 
137
  sadness = emotions.get("sadness", 0)
 
141
  anger = emotions.get("anger", 0)
142
  fear = emotions.get("fear", 0)
143
 
144
+ # 1. Performative Regret
145
  if (
146
  sadness > 0.4 and
147
+ any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery"]) and
148
  (sentiment == "undermining" or abuse_score > 40)
149
  ):
150
  return "performative regret"
 
162
  (neutral + disgust) > 0.5 and
163
  any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
164
  sentiment == "undermining"
165
+ ):
166
+ return "cold invalidation"
167
 
168
  # 4. Genuine Vulnerability
169
  if (
170
  (sadness + fear) > 0.5 and
171
  sentiment == "supportive" and
172
+ all(p in ["recovery"] for p in patterns)
173
  ):
174
  return "genuine vulnerability"
175
 
 
196
  sentiment == "undermining"
197
  ):
198
  return "toxic resignation"
199
+
200
+ # 8. Aggressive Dismissal
201
  if (
202
  anger > 0.5 and
203
+ any(p in patterns for p in ["insults", "control"]) and
204
  sentiment == "undermining"
205
  ):
206
  return "aggressive dismissal"
207
+
208
  # 9. Deflective Hostility
209
  if (
210
  (0.2 < anger < 0.7 or 0.2 < disgust < 0.7) and
211
+ any(p in patterns for p in ["projection"]) and
 
 
 
 
 
 
 
212
  sentiment == "undermining"
213
  ):
214
+ return "deflective hostility"
215
+
216
+ # 10. Contradictory Gaslight
217
  if (
218
  (joy + anger + sadness) > 0.5 and
219
  any(p in patterns for p in ["gaslighting", "contradictory statements"]) and
220
  sentiment == "undermining"
221
  ):
222
  return "contradictory gaslight"
223
+
224
+ # 11. Forced Accountability Flip
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  if (
226
  (anger + disgust) > 0.5 and
227
+ any(p in patterns for p in ["blame shifting", "projection"]) and
228
  sentiment == "undermining"
229
  ):
230
  return "forced accountability flip"
 
235
  sentiment == "undermining"
236
  ):
237
  return "emotional instability"
 
 
238
 
239
+ return "neutral"
240
+ @spaces.GPU
241
  def predict_darvo_score(text):
242
  """Predict DARVO score for given text"""
243
  try:
 
272
  logger.error(f"Error determining risk stage: {e}")
273
  return 1
274
 
 
275
  @spaces.GPU
276
  def compute_abuse_score(matched_scores, sentiment):
277
+ """Compute abuse score from matched patterns and sentiment"""
 
 
278
  try:
279
  if not matched_scores:
280
  return 0.0
281
 
 
282
  total_weight = sum(weight for _, _, weight in matched_scores)
283
  if total_weight == 0:
284
  return 0.0
 
289
  weighted_sum = sum(score * weight for _, score, weight in matched_scores)
290
  base_score = (weighted_sum / total_weight) * 100
291
 
292
+ # Pattern combination multipliers
293
  if len(matched_scores) >= 3:
294
  base_score *= 1.2
295
 
 
321
  logger.error(f"Error computing abuse score: {e}")
322
  return 0.0
323
 
 
324
  @spaces.GPU
325
  def analyze_single_message(text, thresholds):
326
  """Analyze a single message for abuse patterns"""
 
336
  explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
337
  explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
338
  logger.debug(f"Explicit abuse detected: {explicit_abuse}")
 
 
 
 
 
 
 
339
 
 
 
340
  # Abuse model inference
341
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
342
+ inputs = {k: v.to(device) for k, v in inputs.items()}
343
 
344
  with torch.no_grad():
345
  outputs = model(**inputs)
 
368
  if explicit_abuse:
369
  base_threshold *= 0.5
370
  if score > base_threshold:
371
+ if label not in threshold_labels:
372
  threshold_labels.append(label)
373
 
374
  logger.debug("\nLabels that passed thresholds:", threshold_labels)
 
381
  if explicit_abuse and label == "insults":
382
  weight *= 1.5
383
  matched_scores.append((label, score, weight))
 
 
384
 
385
  # Get sentiment
386
  sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
 
389
  sent_logits = sentiment_model(**sent_inputs).logits[0]
390
  sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
391
  sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
  # Calculate abuse score
 
 
 
 
394
  abuse_score = compute_abuse_score(matched_scores, sentiment)
 
395
  if explicit_abuse:
396
  abuse_score = max(abuse_score, 70.0)
397
+
398
+ # Get DARVO score
399
+ darvo_score = predict_darvo_score(text)
400
+
401
+ # Get tone using emotion-based approach
402
+ tone_tag = get_emotional_tone_tag(text, sentiment, threshold_labels, abuse_score)
403
 
404
  # Set stage
405
  stage = 2 if explicit_abuse or abuse_score > 70 else 1
 
406
 
407
  logger.debug("=== DEBUG END ===\n")
408
+
409
  return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
410
 
411
  except Exception as e:
 
483
  responses_checked = any(answers_and_none[:-1])
484
  none_selected = not responses_checked and none_selected_checked
485
 
 
486
  if none_selected:
487
  escalation_score = 0
488
  escalation_note = "Checklist completed: no danger items reported."
 
534
  high = {'control'}
535
  moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults',
536
  'contradictory statements', 'guilt tripping'}
537
+ low = {'blame shifting', 'projection', 'recovery'}
538
 
539
  counts = {'high': 0, 'moderate': 0, 'low': 0}
540
  for label in predicted_labels:
 
545
  elif label in low:
546
  counts['low'] += 1
547
 
548
+ # Pattern escalation logic
549
  if counts['high'] >= 2 and counts['moderate'] >= 2:
550
  pattern_escalation_risk = "Critical"
551
  elif (counts['high'] >= 2 and counts['moderate'] >= 1) or \
 
618
  f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
619
  )
620
 
621
+ # Composite Abuse Score
622
  composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
623
 
624
  # Get most common stage
625
  most_common_stage = max(set(stages), key=stages.count)
626
  stage_text = RISK_STAGE_LABELS[most_common_stage]
627
 
628
+ # Build output text
629
+ out = f"Abuse Intensity: {composite_abuse}%\n"
630
+ out += "πŸ“Š This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
 
 
 
 
 
631
 
632
+ # Add risk assessment
633
+ risk_level = (
634
+ "Critical" if composite_abuse >= 85 or hybrid_score >= 20 else
635
+ "High" if composite_abuse >= 70 or hybrid_score >= 15 else
636
+ "Moderate" if composite_abuse >= 50 or hybrid_score >= 10 else
637
+ "Low"
638
+ )
639
+
640
+ risk_descriptions = {
641
+ "Critical": (
642
+ "🚨 **Risk Level: Critical**\n"
643
+ "Multiple severe abuse patterns detected. This situation shows signs of "
644
+ "dangerous escalation and immediate intervention may be needed."
645
+ ),
646
+ "High": (
647
+ "⚠️ **Risk Level: High**\n"
648
+ "Strong abuse patterns detected. This situation shows concerning "
649
+ "signs of manipulation and control."
650
+ ),
651
+ "Moderate": (
652
+ "⚑ **Risk Level: Moderate**\n"
653
+ "Concerning patterns detected. While not severe, these behaviors "
654
+ "indicate unhealthy relationship dynamics."
655
+ ),
656
+ "Low": (
657
+ "πŸ“ **Risk Level: Low**\n"
658
+ "Minor concerning patterns detected. While present, the detected "
659
+ "behaviors are subtle or infrequent."
660
+ )
661
+ }
662
+
663
+ out += risk_descriptions[risk_level]
664
+ out += f"\n\n{stage_text}"
665
+
666
+ # Add DARVO analysis
667
  avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
 
668
  if avg_darvo > 0.25:
669
  level = "moderate" if avg_darvo < 0.65 else "high"
670
+ out += f"\n\n🎭 **DARVO Score: {avg_darvo}** β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
671
 
672
+ # Add emotional tones
 
 
 
 
 
673
  out += "\n\n🎭 **Emotional Tones Detected:**\n"
674
  for i, tone in enumerate(tone_tags):
675
  out += f"β€’ Message {i+1}: *{tone or 'none'}*\n"
676
 
677
+ # Add threats section
678
  if flat_threats:
679
  out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
680
  for t in set(flat_threats):
 
725
  logger.error(f"Error creating interface: {e}")
726
  raise
727
 
728
+ # Main execution
729
  if __name__ == "__main__":
730
  try:
731
  demo = create_interface()
 
735
  share=False
736
  )
737
  except Exception as e:
738
+ logger.error(f"Failed to launch app: {e}")
739
+ raise