Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
|
|
2 |
import spaces
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
-
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
6 |
import re
|
7 |
import matplotlib.pyplot as plt
|
8 |
import io
|
@@ -11,41 +11,37 @@ from datetime import datetime
|
|
11 |
from torch.nn.functional import sigmoid
|
12 |
from collections import Counter
|
13 |
import logging
|
14 |
-
from transformers import pipeline as hf_pipeline
|
15 |
-
|
16 |
-
# Add this with your other model loading code
|
17 |
-
emotion_pipeline = hf_pipeline(
|
18 |
-
"text-classification",
|
19 |
-
model="j-hartmann/emotion-english-distilroberta-base",
|
20 |
-
top_k=6,
|
21 |
-
truncation=True,
|
22 |
-
device=0 if torch.cuda.is_available() else -1 # GPU support
|
23 |
-
)
|
24 |
-
|
25 |
-
# Add this after imports
|
26 |
-
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
27 |
|
28 |
# Set up logging
|
29 |
logging.basicConfig(level=logging.DEBUG)
|
30 |
logger = logging.getLogger(__name__)
|
31 |
|
|
|
|
|
|
|
32 |
|
33 |
-
|
34 |
-
# Model initialization with error handling
|
35 |
# Model initialization
|
36 |
model_name = "SamanthaStorm/tether-multilabel-v4"
|
37 |
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
|
38 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
39 |
|
40 |
-
|
41 |
# Sentiment model
|
42 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment").to(device)
|
43 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment", use_fast=False)
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
# DARVO model
|
46 |
darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1").to(device)
|
47 |
darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
|
48 |
-
|
49 |
|
50 |
# Constants and Labels
|
51 |
LABELS = [
|
@@ -54,12 +50,6 @@ LABELS = [
|
|
54 |
"contradictory statements", "obscure language"
|
55 |
]
|
56 |
|
57 |
-
TONE_LABELS = [
|
58 |
-
"cold invalidation", "coercive warmth", "contradictory gaslight",
|
59 |
-
"deflective hostility", "emotional instability", "nonabusive",
|
60 |
-
"performative regret", "emotional threat", "forced accountability flip"
|
61 |
-
]
|
62 |
-
|
63 |
SENTIMENT_LABELS = ["undermining", "supportive"]
|
64 |
|
65 |
THRESHOLDS = {
|
@@ -89,7 +79,6 @@ PATTERN_WEIGHTS = {
|
|
89 |
"obscure language": 0.9,
|
90 |
"nonabusive": 0.0
|
91 |
}
|
92 |
-
|
93 |
ESCALATION_QUESTIONS = [
|
94 |
("Partner has access to firearms or weapons", 4),
|
95 |
("Partner threatened to kill you", 3),
|
@@ -110,7 +99,6 @@ RISK_STAGE_LABELS = {
|
|
110 |
4: "πΈ Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
111 |
}
|
112 |
|
113 |
-
# Threat Motifs
|
114 |
THREAT_MOTIFS = [
|
115 |
"i'll kill you", "i'm going to hurt you", "you're dead", "you won't survive this",
|
116 |
"i'll break your face", "i'll bash your head in", "i'll snap your neck",
|
@@ -134,14 +122,16 @@ THREAT_MOTIFS = [
|
|
134 |
"if you just behaved, this wouldn't happen", "this is your fault",
|
135 |
"you're making me hurt you", "i warned you", "you should have listened"
|
136 |
]
|
|
|
137 |
def get_emotion_profile(text):
|
|
|
138 |
emotions = emotion_pipeline(text)
|
139 |
if isinstance(emotions, list) and isinstance(emotions[0], list):
|
140 |
emotions = emotions[0]
|
141 |
return {e['label'].lower(): round(e['score'], 3) for e in emotions}
|
142 |
|
143 |
def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
|
144 |
-
|
145 |
emotions = get_emotion_profile(text)
|
146 |
|
147 |
sadness = emotions.get("sadness", 0)
|
@@ -151,10 +141,10 @@ def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
|
|
151 |
anger = emotions.get("anger", 0)
|
152 |
fear = emotions.get("fear", 0)
|
153 |
|
154 |
-
|
155 |
if (
|
156 |
sadness > 0.4 and
|
157 |
-
any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery
|
158 |
(sentiment == "undermining" or abuse_score > 40)
|
159 |
):
|
160 |
return "performative regret"
|
@@ -172,13 +162,14 @@ def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
|
|
172 |
(neutral + disgust) > 0.5 and
|
173 |
any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
|
174 |
sentiment == "undermining"
|
175 |
-
):
|
|
|
176 |
|
177 |
# 4. Genuine Vulnerability
|
178 |
if (
|
179 |
(sadness + fear) > 0.5 and
|
180 |
sentiment == "supportive" and
|
181 |
-
all(p in ["recovery
|
182 |
):
|
183 |
return "genuine vulnerability"
|
184 |
|
@@ -205,59 +196,35 @@ def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
|
|
205 |
sentiment == "undermining"
|
206 |
):
|
207 |
return "toxic resignation"
|
208 |
-
|
|
|
209 |
if (
|
210 |
anger > 0.5 and
|
211 |
-
any(p in patterns for p in ["
|
212 |
sentiment == "undermining"
|
213 |
):
|
214 |
return "aggressive dismissal"
|
|
|
215 |
# 9. Deflective Hostility
|
216 |
if (
|
217 |
(0.2 < anger < 0.7 or 0.2 < disgust < 0.7) and
|
218 |
-
any(p in patterns for p in ["
|
219 |
-
sentiment == "undermining"
|
220 |
-
):
|
221 |
-
return "deflective hostility"
|
222 |
-
# 10. Mocking Detachment
|
223 |
-
if (
|
224 |
-
(neutral + joy) > 0.5 and
|
225 |
-
any(p in patterns for p in ["mockery", "insults", "projection"]) and
|
226 |
sentiment == "undermining"
|
227 |
):
|
228 |
-
return "
|
229 |
-
|
|
|
230 |
if (
|
231 |
(joy + anger + sadness) > 0.5 and
|
232 |
any(p in patterns for p in ["gaslighting", "contradictory statements"]) and
|
233 |
sentiment == "undermining"
|
234 |
):
|
235 |
return "contradictory gaslight"
|
236 |
-
|
237 |
-
|
238 |
-
neutral > 0.6 and
|
239 |
-
any(p in patterns for p in ["obscure language", "deflection", "dismissiveness"]) and
|
240 |
-
sentiment == "undermining"
|
241 |
-
):
|
242 |
-
return "calculated neutrality"
|
243 |
-
# 13. Forced Accountability Flip
|
244 |
-
if (
|
245 |
-
(anger + disgust) > 0.5 and
|
246 |
-
any(p in patterns for p in ["blame shifting", "manipulation", "projection"]) and
|
247 |
-
sentiment == "undermining"
|
248 |
-
):
|
249 |
-
return "forced accountability flip"
|
250 |
-
# 14. Conditional Affection
|
251 |
-
if (
|
252 |
-
joy > 0.4 and
|
253 |
-
any(p in patterns for p in ["apology baiting", "control", "recovery phase"]) and
|
254 |
-
sentiment == "undermining"
|
255 |
-
):
|
256 |
-
return "conditional affection"
|
257 |
-
|
258 |
if (
|
259 |
(anger + disgust) > 0.5 and
|
260 |
-
any(p in patterns for p in ["blame shifting", "projection"
|
261 |
sentiment == "undermining"
|
262 |
):
|
263 |
return "forced accountability flip"
|
@@ -268,9 +235,9 @@ def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
|
|
268 |
sentiment == "undermining"
|
269 |
):
|
270 |
return "emotional instability"
|
271 |
-
|
272 |
-
return None
|
273 |
|
|
|
|
|
274 |
def predict_darvo_score(text):
|
275 |
"""Predict DARVO score for given text"""
|
276 |
try:
|
@@ -305,17 +272,13 @@ def get_risk_stage(patterns, sentiment):
|
|
305 |
logger.error(f"Error determining risk stage: {e}")
|
306 |
return 1
|
307 |
|
308 |
-
|
309 |
@spaces.GPU
|
310 |
def compute_abuse_score(matched_scores, sentiment):
|
311 |
-
"""
|
312 |
-
Compute abuse score from matched patterns and sentiment
|
313 |
-
"""
|
314 |
try:
|
315 |
if not matched_scores:
|
316 |
return 0.0
|
317 |
|
318 |
-
# Calculate weighted score
|
319 |
total_weight = sum(weight for _, _, weight in matched_scores)
|
320 |
if total_weight == 0:
|
321 |
return 0.0
|
@@ -326,7 +289,7 @@ def compute_abuse_score(matched_scores, sentiment):
|
|
326 |
weighted_sum = sum(score * weight for _, score, weight in matched_scores)
|
327 |
base_score = (weighted_sum / total_weight) * 100
|
328 |
|
329 |
-
#
|
330 |
if len(matched_scores) >= 3:
|
331 |
base_score *= 1.2
|
332 |
|
@@ -358,7 +321,6 @@ def compute_abuse_score(matched_scores, sentiment):
|
|
358 |
logger.error(f"Error computing abuse score: {e}")
|
359 |
return 0.0
|
360 |
|
361 |
-
|
362 |
@spaces.GPU
|
363 |
def analyze_single_message(text, thresholds):
|
364 |
"""Analyze a single message for abuse patterns"""
|
@@ -374,19 +336,10 @@ def analyze_single_message(text, thresholds):
|
|
374 |
explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
|
375 |
explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
|
376 |
logger.debug(f"Explicit abuse detected: {explicit_abuse}")
|
377 |
-
# Get sentiment
|
378 |
-
sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
379 |
-
sent_inputs = {k: v.to(device) for k, v in sent_inputs.items()}
|
380 |
-
with torch.no_grad():
|
381 |
-
sent_logits = sentiment_model(**sent_inputs).logits[0]
|
382 |
-
sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
|
383 |
-
sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
|
384 |
|
385 |
-
# Get tone using emotion-based approach
|
386 |
-
tone_tag = get_emotional_tone_tag(text, sentiment, threshold_labels, abuse_score)
|
387 |
# Abuse model inference
|
388 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
389 |
-
inputs = {k: v.to(device) for k, v in inputs.items()}
|
390 |
|
391 |
with torch.no_grad():
|
392 |
outputs = model(**inputs)
|
@@ -415,7 +368,7 @@ def analyze_single_message(text, thresholds):
|
|
415 |
if explicit_abuse:
|
416 |
base_threshold *= 0.5
|
417 |
if score > base_threshold:
|
418 |
-
if label not in threshold_labels:
|
419 |
threshold_labels.append(label)
|
420 |
|
421 |
logger.debug("\nLabels that passed thresholds:", threshold_labels)
|
@@ -428,8 +381,6 @@ def analyze_single_message(text, thresholds):
|
|
428 |
if explicit_abuse and label == "insults":
|
429 |
weight *= 1.5
|
430 |
matched_scores.append((label, score, weight))
|
431 |
-
|
432 |
-
logger.debug("\nMatched scores (label, score, weight):", matched_scores)
|
433 |
|
434 |
# Get sentiment
|
435 |
sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
@@ -438,38 +389,23 @@ def analyze_single_message(text, thresholds):
|
|
438 |
sent_logits = sentiment_model(**sent_inputs).logits[0]
|
439 |
sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
|
440 |
sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
|
441 |
-
logger.debug(f"\nDetected sentiment: {sentiment}")
|
442 |
-
|
443 |
-
# Get tone
|
444 |
-
tone_inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
445 |
-
tone_inputs = {k: v.to(device) for k, v in tone_inputs.items()}
|
446 |
-
with torch.no_grad():
|
447 |
-
tone_logits = tone_model(**tone_inputs).logits[0]
|
448 |
-
tone_probs = torch.sigmoid(tone_logits).cpu().numpy()
|
449 |
-
tone_tag = TONE_LABELS[int(np.argmax(tone_probs))]
|
450 |
-
logger.debug(f"Detected tone: {tone_tag}")
|
451 |
-
|
452 |
-
# Get DARVO score
|
453 |
-
darvo_score = predict_darvo_score(text)
|
454 |
-
logger.debug(f"DARVO score: {darvo_score}")
|
455 |
|
456 |
# Calculate abuse score
|
457 |
-
if not matched_scores:
|
458 |
-
logger.debug("No matched scores, returning 0")
|
459 |
-
return 0.0, [], [], {"label": "undermining"}, 2 if explicit_abuse else 1, 0.0, None
|
460 |
-
|
461 |
abuse_score = compute_abuse_score(matched_scores, sentiment)
|
462 |
-
|
463 |
if explicit_abuse:
|
464 |
abuse_score = max(abuse_score, 70.0)
|
465 |
-
|
466 |
-
|
|
|
|
|
|
|
|
|
467 |
|
468 |
# Set stage
|
469 |
stage = 2 if explicit_abuse or abuse_score > 70 else 1
|
470 |
-
logger.debug(f"Final stage: {stage}")
|
471 |
|
472 |
logger.debug("=== DEBUG END ===\n")
|
|
|
473 |
return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
|
474 |
|
475 |
except Exception as e:
|
@@ -547,7 +483,6 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
|
547 |
responses_checked = any(answers_and_none[:-1])
|
548 |
none_selected = not responses_checked and none_selected_checked
|
549 |
|
550 |
-
# Determine escalation score
|
551 |
if none_selected:
|
552 |
escalation_score = 0
|
553 |
escalation_note = "Checklist completed: no danger items reported."
|
@@ -599,7 +534,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
|
599 |
high = {'control'}
|
600 |
moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults',
|
601 |
'contradictory statements', 'guilt tripping'}
|
602 |
-
low = {'blame shifting', 'projection', 'recovery
|
603 |
|
604 |
counts = {'high': 0, 'moderate': 0, 'low': 0}
|
605 |
for label in predicted_labels:
|
@@ -610,7 +545,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
|
610 |
elif label in low:
|
611 |
counts['low'] += 1
|
612 |
|
613 |
-
#
|
614 |
if counts['high'] >= 2 and counts['moderate'] >= 2:
|
615 |
pattern_escalation_risk = "Critical"
|
616 |
elif (counts['high'] >= 2 and counts['moderate'] >= 1) or \
|
@@ -683,40 +618,63 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
|
683 |
f"β’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
|
684 |
)
|
685 |
|
686 |
-
#
|
687 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
688 |
|
689 |
# Get most common stage
|
690 |
most_common_stage = max(set(stages), key=stages.count)
|
691 |
stage_text = RISK_STAGE_LABELS[most_common_stage]
|
692 |
|
693 |
-
#
|
694 |
-
|
695 |
-
|
696 |
-
threshold_labels = result[1]
|
697 |
-
if threshold_labels:
|
698 |
-
top_labels.append(threshold_labels[0])
|
699 |
-
else:
|
700 |
-
top_labels.append("none")
|
701 |
|
702 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
703 |
avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
|
704 |
-
darvo_blurb = ""
|
705 |
if avg_darvo > 0.25:
|
706 |
level = "moderate" if avg_darvo < 0.65 else "high"
|
707 |
-
|
708 |
|
709 |
-
#
|
710 |
-
out = f"Abuse Intensity: {composite_abuse}%\n"
|
711 |
-
out += "π This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
712 |
-
out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
|
713 |
-
out += f"\n\n{stage_text}"
|
714 |
-
out += darvo_blurb
|
715 |
out += "\n\nπ **Emotional Tones Detected:**\n"
|
716 |
for i, tone in enumerate(tone_tags):
|
717 |
out += f"β’ Message {i+1}: *{tone or 'none'}*\n"
|
718 |
|
719 |
-
# Add
|
720 |
if flat_threats:
|
721 |
out += "\n\nπ¨ **Immediate Danger Threats Detected:**\n"
|
722 |
for t in set(flat_threats):
|
@@ -767,7 +725,7 @@ def create_interface():
|
|
767 |
logger.error(f"Error creating interface: {e}")
|
768 |
raise
|
769 |
|
770 |
-
|
771 |
if __name__ == "__main__":
|
772 |
try:
|
773 |
demo = create_interface()
|
@@ -777,4 +735,5 @@ if __name__ == "__main__":
|
|
777 |
share=False
|
778 |
)
|
779 |
except Exception as e:
|
780 |
-
|
|
|
|
2 |
import spaces
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline as hf_pipeline
|
6 |
import re
|
7 |
import matplotlib.pyplot as plt
|
8 |
import io
|
|
|
11 |
from torch.nn.functional import sigmoid
|
12 |
from collections import Counter
|
13 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Set up logging
|
16 |
logging.basicConfig(level=logging.DEBUG)
|
17 |
logger = logging.getLogger(__name__)
|
18 |
|
19 |
+
# Device configuration
|
20 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
21 |
+
logger.info(f"Using device: {device}")
|
22 |
|
|
|
|
|
23 |
# Model initialization
|
24 |
model_name = "SamanthaStorm/tether-multilabel-v4"
|
25 |
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
27 |
|
|
|
28 |
# Sentiment model
|
29 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment").to(device)
|
30 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment", use_fast=False)
|
31 |
|
32 |
+
# Emotion pipeline
|
33 |
+
emotion_pipeline = hf_pipeline(
|
34 |
+
"text-classification",
|
35 |
+
model="j-hartmann/emotion-english-distilroberta-base",
|
36 |
+
top_k=6,
|
37 |
+
truncation=True,
|
38 |
+
device=0 if torch.cuda.is_available() else -1
|
39 |
+
)
|
40 |
+
|
41 |
# DARVO model
|
42 |
darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1").to(device)
|
43 |
darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
|
44 |
+
darvo_model.eval()
|
45 |
|
46 |
# Constants and Labels
|
47 |
LABELS = [
|
|
|
50 |
"contradictory statements", "obscure language"
|
51 |
]
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
SENTIMENT_LABELS = ["undermining", "supportive"]
|
54 |
|
55 |
THRESHOLDS = {
|
|
|
79 |
"obscure language": 0.9,
|
80 |
"nonabusive": 0.0
|
81 |
}
|
|
|
82 |
ESCALATION_QUESTIONS = [
|
83 |
("Partner has access to firearms or weapons", 4),
|
84 |
("Partner threatened to kill you", 3),
|
|
|
99 |
4: "πΈ Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
100 |
}
|
101 |
|
|
|
102 |
THREAT_MOTIFS = [
|
103 |
"i'll kill you", "i'm going to hurt you", "you're dead", "you won't survive this",
|
104 |
"i'll break your face", "i'll bash your head in", "i'll snap your neck",
|
|
|
122 |
"if you just behaved, this wouldn't happen", "this is your fault",
|
123 |
"you're making me hurt you", "i warned you", "you should have listened"
|
124 |
]
|
125 |
+
|
126 |
def get_emotion_profile(text):
|
127 |
+
"""Get emotion profile from text"""
|
128 |
emotions = emotion_pipeline(text)
|
129 |
if isinstance(emotions, list) and isinstance(emotions[0], list):
|
130 |
emotions = emotions[0]
|
131 |
return {e['label'].lower(): round(e['score'], 3) for e in emotions}
|
132 |
|
133 |
def get_emotional_tone_tag(text, sentiment, patterns, abuse_score):
|
134 |
+
"""Get emotional tone tag based on emotions and patterns"""
|
135 |
emotions = get_emotion_profile(text)
|
136 |
|
137 |
sadness = emotions.get("sadness", 0)
|
|
|
141 |
anger = emotions.get("anger", 0)
|
142 |
fear = emotions.get("fear", 0)
|
143 |
|
144 |
+
# 1. Performative Regret
|
145 |
if (
|
146 |
sadness > 0.4 and
|
147 |
+
any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery"]) and
|
148 |
(sentiment == "undermining" or abuse_score > 40)
|
149 |
):
|
150 |
return "performative regret"
|
|
|
162 |
(neutral + disgust) > 0.5 and
|
163 |
any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
|
164 |
sentiment == "undermining"
|
165 |
+
):
|
166 |
+
return "cold invalidation"
|
167 |
|
168 |
# 4. Genuine Vulnerability
|
169 |
if (
|
170 |
(sadness + fear) > 0.5 and
|
171 |
sentiment == "supportive" and
|
172 |
+
all(p in ["recovery"] for p in patterns)
|
173 |
):
|
174 |
return "genuine vulnerability"
|
175 |
|
|
|
196 |
sentiment == "undermining"
|
197 |
):
|
198 |
return "toxic resignation"
|
199 |
+
|
200 |
+
# 8. Aggressive Dismissal
|
201 |
if (
|
202 |
anger > 0.5 and
|
203 |
+
any(p in patterns for p in ["insults", "control"]) and
|
204 |
sentiment == "undermining"
|
205 |
):
|
206 |
return "aggressive dismissal"
|
207 |
+
|
208 |
# 9. Deflective Hostility
|
209 |
if (
|
210 |
(0.2 < anger < 0.7 or 0.2 < disgust < 0.7) and
|
211 |
+
any(p in patterns for p in ["projection"]) and
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
sentiment == "undermining"
|
213 |
):
|
214 |
+
return "deflective hostility"
|
215 |
+
|
216 |
+
# 10. Contradictory Gaslight
|
217 |
if (
|
218 |
(joy + anger + sadness) > 0.5 and
|
219 |
any(p in patterns for p in ["gaslighting", "contradictory statements"]) and
|
220 |
sentiment == "undermining"
|
221 |
):
|
222 |
return "contradictory gaslight"
|
223 |
+
|
224 |
+
# 11. Forced Accountability Flip
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
if (
|
226 |
(anger + disgust) > 0.5 and
|
227 |
+
any(p in patterns for p in ["blame shifting", "projection"]) and
|
228 |
sentiment == "undermining"
|
229 |
):
|
230 |
return "forced accountability flip"
|
|
|
235 |
sentiment == "undermining"
|
236 |
):
|
237 |
return "emotional instability"
|
|
|
|
|
238 |
|
239 |
+
return "neutral"
|
240 |
+
@spaces.GPU
|
241 |
def predict_darvo_score(text):
|
242 |
"""Predict DARVO score for given text"""
|
243 |
try:
|
|
|
272 |
logger.error(f"Error determining risk stage: {e}")
|
273 |
return 1
|
274 |
|
|
|
275 |
@spaces.GPU
|
276 |
def compute_abuse_score(matched_scores, sentiment):
|
277 |
+
"""Compute abuse score from matched patterns and sentiment"""
|
|
|
|
|
278 |
try:
|
279 |
if not matched_scores:
|
280 |
return 0.0
|
281 |
|
|
|
282 |
total_weight = sum(weight for _, _, weight in matched_scores)
|
283 |
if total_weight == 0:
|
284 |
return 0.0
|
|
|
289 |
weighted_sum = sum(score * weight for _, score, weight in matched_scores)
|
290 |
base_score = (weighted_sum / total_weight) * 100
|
291 |
|
292 |
+
# Pattern combination multipliers
|
293 |
if len(matched_scores) >= 3:
|
294 |
base_score *= 1.2
|
295 |
|
|
|
321 |
logger.error(f"Error computing abuse score: {e}")
|
322 |
return 0.0
|
323 |
|
|
|
324 |
@spaces.GPU
|
325 |
def analyze_single_message(text, thresholds):
|
326 |
"""Analyze a single message for abuse patterns"""
|
|
|
336 |
explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
|
337 |
explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
|
338 |
logger.debug(f"Explicit abuse detected: {explicit_abuse}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
|
|
|
|
|
340 |
# Abuse model inference
|
341 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
342 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
343 |
|
344 |
with torch.no_grad():
|
345 |
outputs = model(**inputs)
|
|
|
368 |
if explicit_abuse:
|
369 |
base_threshold *= 0.5
|
370 |
if score > base_threshold:
|
371 |
+
if label not in threshold_labels:
|
372 |
threshold_labels.append(label)
|
373 |
|
374 |
logger.debug("\nLabels that passed thresholds:", threshold_labels)
|
|
|
381 |
if explicit_abuse and label == "insults":
|
382 |
weight *= 1.5
|
383 |
matched_scores.append((label, score, weight))
|
|
|
|
|
384 |
|
385 |
# Get sentiment
|
386 |
sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
|
|
389 |
sent_logits = sentiment_model(**sent_inputs).logits[0]
|
390 |
sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
|
391 |
sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
# Calculate abuse score
|
|
|
|
|
|
|
|
|
394 |
abuse_score = compute_abuse_score(matched_scores, sentiment)
|
|
|
395 |
if explicit_abuse:
|
396 |
abuse_score = max(abuse_score, 70.0)
|
397 |
+
|
398 |
+
# Get DARVO score
|
399 |
+
darvo_score = predict_darvo_score(text)
|
400 |
+
|
401 |
+
# Get tone using emotion-based approach
|
402 |
+
tone_tag = get_emotional_tone_tag(text, sentiment, threshold_labels, abuse_score)
|
403 |
|
404 |
# Set stage
|
405 |
stage = 2 if explicit_abuse or abuse_score > 70 else 1
|
|
|
406 |
|
407 |
logger.debug("=== DEBUG END ===\n")
|
408 |
+
|
409 |
return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
|
410 |
|
411 |
except Exception as e:
|
|
|
483 |
responses_checked = any(answers_and_none[:-1])
|
484 |
none_selected = not responses_checked and none_selected_checked
|
485 |
|
|
|
486 |
if none_selected:
|
487 |
escalation_score = 0
|
488 |
escalation_note = "Checklist completed: no danger items reported."
|
|
|
534 |
high = {'control'}
|
535 |
moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults',
|
536 |
'contradictory statements', 'guilt tripping'}
|
537 |
+
low = {'blame shifting', 'projection', 'recovery'}
|
538 |
|
539 |
counts = {'high': 0, 'moderate': 0, 'low': 0}
|
540 |
for label in predicted_labels:
|
|
|
545 |
elif label in low:
|
546 |
counts['low'] += 1
|
547 |
|
548 |
+
# Pattern escalation logic
|
549 |
if counts['high'] >= 2 and counts['moderate'] >= 2:
|
550 |
pattern_escalation_risk = "Critical"
|
551 |
elif (counts['high'] >= 2 and counts['moderate'] >= 1) or \
|
|
|
618 |
f"β’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
|
619 |
)
|
620 |
|
621 |
+
# Composite Abuse Score
|
622 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
623 |
|
624 |
# Get most common stage
|
625 |
most_common_stage = max(set(stages), key=stages.count)
|
626 |
stage_text = RISK_STAGE_LABELS[most_common_stage]
|
627 |
|
628 |
+
# Build output text
|
629 |
+
out = f"Abuse Intensity: {composite_abuse}%\n"
|
630 |
+
out += "π This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
|
|
|
|
|
|
|
|
|
|
631 |
|
632 |
+
# Add risk assessment
|
633 |
+
risk_level = (
|
634 |
+
"Critical" if composite_abuse >= 85 or hybrid_score >= 20 else
|
635 |
+
"High" if composite_abuse >= 70 or hybrid_score >= 15 else
|
636 |
+
"Moderate" if composite_abuse >= 50 or hybrid_score >= 10 else
|
637 |
+
"Low"
|
638 |
+
)
|
639 |
+
|
640 |
+
risk_descriptions = {
|
641 |
+
"Critical": (
|
642 |
+
"π¨ **Risk Level: Critical**\n"
|
643 |
+
"Multiple severe abuse patterns detected. This situation shows signs of "
|
644 |
+
"dangerous escalation and immediate intervention may be needed."
|
645 |
+
),
|
646 |
+
"High": (
|
647 |
+
"β οΈ **Risk Level: High**\n"
|
648 |
+
"Strong abuse patterns detected. This situation shows concerning "
|
649 |
+
"signs of manipulation and control."
|
650 |
+
),
|
651 |
+
"Moderate": (
|
652 |
+
"β‘ **Risk Level: Moderate**\n"
|
653 |
+
"Concerning patterns detected. While not severe, these behaviors "
|
654 |
+
"indicate unhealthy relationship dynamics."
|
655 |
+
),
|
656 |
+
"Low": (
|
657 |
+
"π **Risk Level: Low**\n"
|
658 |
+
"Minor concerning patterns detected. While present, the detected "
|
659 |
+
"behaviors are subtle or infrequent."
|
660 |
+
)
|
661 |
+
}
|
662 |
+
|
663 |
+
out += risk_descriptions[risk_level]
|
664 |
+
out += f"\n\n{stage_text}"
|
665 |
+
|
666 |
+
# Add DARVO analysis
|
667 |
avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
|
|
|
668 |
if avg_darvo > 0.25:
|
669 |
level = "moderate" if avg_darvo < 0.65 else "high"
|
670 |
+
out += f"\n\nπ **DARVO Score: {avg_darvo}** β This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
|
671 |
|
672 |
+
# Add emotional tones
|
|
|
|
|
|
|
|
|
|
|
673 |
out += "\n\nπ **Emotional Tones Detected:**\n"
|
674 |
for i, tone in enumerate(tone_tags):
|
675 |
out += f"β’ Message {i+1}: *{tone or 'none'}*\n"
|
676 |
|
677 |
+
# Add threats section
|
678 |
if flat_threats:
|
679 |
out += "\n\nπ¨ **Immediate Danger Threats Detected:**\n"
|
680 |
for t in set(flat_threats):
|
|
|
725 |
logger.error(f"Error creating interface: {e}")
|
726 |
raise
|
727 |
|
728 |
+
# Main execution
|
729 |
if __name__ == "__main__":
|
730 |
try:
|
731 |
demo = create_interface()
|
|
|
735 |
share=False
|
736 |
)
|
737 |
except Exception as e:
|
738 |
+
logger.error(f"Failed to launch app: {e}")
|
739 |
+
raise
|