Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -130,51 +130,6 @@ ESCALATION_QUESTIONS = [
|
|
130 |
("Violence has increased in frequency or severity", 3),
|
131 |
("Partner monitors your calls/GPS/social media", 2)
|
132 |
]
|
133 |
-
DARVO_PATTERNS = [
|
134 |
-
"blame shifting", # "You're the reason this happens"
|
135 |
-
"projection", # "You're the abusive one"
|
136 |
-
"deflection", # "This isn't about that"
|
137 |
-
"dismissiveness", # "You're overreacting"
|
138 |
-
"insults", # Personal attacks that redirect attention
|
139 |
-
"aggression", # Escalates tone to destabilize
|
140 |
-
"recovery phase", # Sudden affection following aggression
|
141 |
-
"contradictory statements" # “I never said that” immediately followed by a version of what they said
|
142 |
-
]
|
143 |
-
DARVO_MOTIFS = [
|
144 |
-
"I never said that.", "You’re imagining things.", "That never happened.",
|
145 |
-
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
|
146 |
-
"I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.",
|
147 |
-
"You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.",
|
148 |
-
"You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
|
149 |
-
"You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
|
150 |
-
"You’re always so dramatic.", "You’re just trying to make me look bad.",
|
151 |
-
|
152 |
-
"You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
|
153 |
-
"You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
|
154 |
-
"You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
|
155 |
-
"You’re the one causing all the problems.", "You’re just trying to make me feel guilty.",
|
156 |
-
"You’re the one who can’t let go of the past.", "You’re the one who’s always angry.",
|
157 |
-
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
|
158 |
-
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
|
159 |
-
"You’re the one who’s always making me look like the bad guy.",
|
160 |
-
"You’re the one who’s always making me feel like a failure.",
|
161 |
-
"You’re the one who’s always making me feel like I’m not good enough.",
|
162 |
-
|
163 |
-
"I can’t believe you’re doing this to me.", "You’re hurting me.",
|
164 |
-
"You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
|
165 |
-
"You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
|
166 |
-
"You’re the one who’s toxic.", "You’re the one who’s gaslighting me.",
|
167 |
-
"You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.",
|
168 |
-
"You’re the one who’s always making me feel like I’m not good enough.",
|
169 |
-
"You’re the one who’s always making me feel like I’m the problem.",
|
170 |
-
"You’re the one who’s always making me feel like I’m the bad guy.",
|
171 |
-
"You’re the one who’s always making me feel like I’m the villain.",
|
172 |
-
"You’re the one who’s always making me feel like I’m the one who needs to change.",
|
173 |
-
"You’re the one who’s always making me feel like I’m the one who’s wrong.",
|
174 |
-
"You’re the one who’s always making me feel like I’m the one who’s crazy.",
|
175 |
-
"You’re the one who’s always making me feel like I’m the one who’s abusive.",
|
176 |
-
"You’re the one who’s always making me feel like I’m the one who’s toxic."
|
177 |
-
]
|
178 |
def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
|
179 |
sadness = emotions.get("sadness", 0)
|
180 |
joy = emotions.get("joy", 0)
|
@@ -304,42 +259,21 @@ def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
|
|
304 |
return "emotional instability"
|
305 |
|
306 |
return None
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
# Sentiment delta
|
323 |
-
sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
|
324 |
-
|
325 |
-
# Match against DARVO motifs more loosely
|
326 |
-
motif_hits = sum(
|
327 |
-
any(phrase.lower() in motif.lower() or motif.lower() in phrase.lower()
|
328 |
-
for phrase in DARVO_MOTIFS)
|
329 |
-
for motif in motifs_found
|
330 |
-
)
|
331 |
-
motif_score = motif_hits / max(len(DARVO_MOTIFS), 1)
|
332 |
-
|
333 |
-
# Contradiction still binary
|
334 |
-
contradiction_score = 1.0 if contradiction_flag else 0.0
|
335 |
-
|
336 |
-
# Final DARVO score
|
337 |
-
return round(min(
|
338 |
-
0.3 * pattern_hits +
|
339 |
-
0.3 * sentiment_shift_score +
|
340 |
-
0.25 * motif_score +
|
341 |
-
0.15 * contradiction_score, 1.0
|
342 |
-
), 3)
|
343 |
def detect_weapon_language(text):
|
344 |
weapon_keywords = [
|
345 |
"knife", "knives", "stab", "cut you", "cutting",
|
@@ -420,35 +354,6 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
|
420 |
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
421 |
return base
|
422 |
|
423 |
-
WHY_FLAGGED = {
|
424 |
-
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
|
425 |
-
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|
426 |
-
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
|
427 |
-
"insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
|
428 |
-
"blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
|
429 |
-
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
430 |
-
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
431 |
-
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
|
432 |
-
"contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
|
433 |
-
"obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
|
434 |
-
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
435 |
-
}
|
436 |
-
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
437 |
-
|
438 |
-
base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
|
439 |
-
base += f"This message shows strong indicators of **{pattern_label}**. "
|
440 |
-
|
441 |
-
if risk_level == "high":
|
442 |
-
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
|
443 |
-
elif risk_level == "moderate":
|
444 |
-
base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
|
445 |
-
else:
|
446 |
-
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
|
447 |
-
|
448 |
-
base += f"\n💡 *Why this might be flagged:*\n{explanation}\n"
|
449 |
-
base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
|
450 |
-
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
451 |
-
return base
|
452 |
|
453 |
# --- Step X: Detect Immediate Danger Threats ---
|
454 |
THREAT_MOTIFS = [
|
@@ -535,23 +440,14 @@ def analyze_single_message(text, thresholds):
|
|
535 |
k: v + 0.05 if sentiment == "supportive" else v
|
536 |
for k, v in thresholds.items()
|
537 |
}
|
538 |
-
|
539 |
-
contradiction_flag = detect_contradiction(text)
|
540 |
|
541 |
threshold_labels = [
|
542 |
label for label, score in zip(LABELS, scores)
|
543 |
if score > adjusted_thresholds[label]
|
544 |
]
|
545 |
tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, 0)
|
546 |
-
|
547 |
-
|
548 |
-
darvo_score = calculate_darvo_score(
|
549 |
-
threshold_labels,
|
550 |
-
sentiment_before=0.0,
|
551 |
-
sentiment_after=sentiment_score,
|
552 |
-
motifs_found=motifs,
|
553 |
-
contradiction_flag=contradiction_flag
|
554 |
-
)
|
555 |
|
556 |
top_patterns = sorted(
|
557 |
[(label, score) for label, score in zip(LABELS, scores)],
|
@@ -621,8 +517,6 @@ def analyze_single_message(text, thresholds):
|
|
621 |
print(f" {label:25} → {score:.3f} {passed}")
|
622 |
print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
|
623 |
print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
|
624 |
-
print(f"Motifs: {motifs}")
|
625 |
-
print(f"Contradiction: {contradiction_flag}")
|
626 |
print("------------------\n")
|
627 |
|
628 |
return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
|
|
|
130 |
("Violence has increased in frequency or severity", 3),
|
131 |
("Partner monitors your calls/GPS/social media", 2)
|
132 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
|
134 |
sadness = emotions.get("sadness", 0)
|
135 |
joy = emotions.get("joy", 0)
|
|
|
259 |
return "emotional instability"
|
260 |
|
261 |
return None
|
262 |
+
# 🔄 New DARVO score model (regression-based)
|
263 |
+
from torch.nn.functional import sigmoid
|
264 |
+
import torch
|
265 |
+
|
266 |
+
# Load your trained DARVO regressor from Hugging Face Hub
|
267 |
+
darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
|
268 |
+
darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
|
269 |
+
darvo_model.eval()
|
270 |
+
|
271 |
+
def predict_darvo_score(text):
|
272 |
+
inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
273 |
+
with torch.no_grad():
|
274 |
+
logits = darvo_model(**inputs).logits
|
275 |
+
score = sigmoid(logits).item()
|
276 |
+
return round(score, 4) # Rounded for display/output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
def detect_weapon_language(text):
|
278 |
weapon_keywords = [
|
279 |
"knife", "knives", "stab", "cut you", "cutting",
|
|
|
354 |
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
355 |
return base
|
356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
|
358 |
# --- Step X: Detect Immediate Danger Threats ---
|
359 |
THREAT_MOTIFS = [
|
|
|
440 |
k: v + 0.05 if sentiment == "supportive" else v
|
441 |
for k, v in thresholds.items()
|
442 |
}
|
443 |
+
darvo_score = predict_darvo_score(text)
|
|
|
444 |
|
445 |
threshold_labels = [
|
446 |
label for label, score in zip(LABELS, scores)
|
447 |
if score > adjusted_thresholds[label]
|
448 |
]
|
449 |
tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, 0)
|
450 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
top_patterns = sorted(
|
453 |
[(label, score) for label, score in zip(LABELS, scores)],
|
|
|
517 |
print(f" {label:25} → {score:.3f} {passed}")
|
518 |
print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
|
519 |
print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
|
|
|
|
|
520 |
print("------------------\n")
|
521 |
|
522 |
return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
|