Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on May 21

Commit

b9947a5

verified ·

1 Parent(s): 666c665

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -123

app.py CHANGED Viewed

@@ -130,51 +130,6 @@ ESCALATION_QUESTIONS = [
     ("Violence has increased in frequency or severity", 3),
     ("Partner monitors your calls/GPS/social media", 2)
 ]
-DARVO_PATTERNS = [
-    "blame shifting",         # "You're the reason this happens"
-    "projection",             # "You're the abusive one"
-    "deflection",             # "This isn't about that"
-    "dismissiveness",         # "You're overreacting"
-    "insults",                # Personal attacks that redirect attention
-    "aggression",             # Escalates tone to destabilize
-    "recovery phase",         # Sudden affection following aggression
-    "contradictory statements" # “I never said that” immediately followed by a version of what they said
-]
-DARVO_MOTIFS = [
-    "I never said that.", "You’re imagining things.", "That never happened.",
-    "You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
-    "I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.",
-    "You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.",
-    "You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
-    "You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
-    "You’re always so dramatic.", "You’re just trying to make me look bad.",
-    "You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
-    "You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
-    "You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
-    "You’re the one causing all the problems.", "You’re just trying to make me feel guilty.",
-    "You’re the one who can’t let go of the past.", "You’re the one who’s always angry.",
-    "You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
-    "You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
-    "You’re the one who’s always making me look like the bad guy.",
-    "You’re the one who’s always making me feel like a failure.",
-    "You’re the one who’s always making me feel like I’m not good enough.",
-    "I can’t believe you’re doing this to me.", "You’re hurting me.",
-    "You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
-    "You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
-    "You’re the one who’s toxic.", "You’re the one who’s gaslighting me.",
-    "You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.",
-    "You’re the one who’s always making me feel like I’m not good enough.",
-    "You’re the one who’s always making me feel like I’m the problem.",
-    "You’re the one who’s always making me feel like I’m the bad guy.",
-    "You’re the one who’s always making me feel like I’m the villain.",
-    "You’re the one who’s always making me feel like I’m the one who needs to change.",
-    "You’re the one who’s always making me feel like I’m the one who’s wrong.",
-    "You’re the one who’s always making me feel like I’m the one who’s crazy.",
-    "You’re the one who’s always making me feel like I’m the one who’s abusive.",
-    "You’re the one who’s always making me feel like I’m the one who’s toxic."
-]
 def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
     sadness = emotions.get("sadness", 0)
     joy = emotions.get("joy", 0)
@@ -304,42 +259,21 @@ def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
         return "emotional instability"
     return None
-def detect_contradiction(message):
-    patterns = [
-        (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
-        (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
-        (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
-        (r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
-        (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
-        (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
-    ]
-    return any(re.search(p, message, flags) for p, flags in patterns)
-def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
-    # Count all detected DARVO-related patterns
-    pattern_hits = sum(1 for p in patterns if p.lower() in DARVO_PATTERNS)
-    # Sentiment delta
-    sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
-    # Match against DARVO motifs more loosely
-    motif_hits = sum(
-        any(phrase.lower() in motif.lower() or motif.lower() in phrase.lower()
-            for phrase in DARVO_MOTIFS)
-        for motif in motifs_found
-    )
-    motif_score = motif_hits / max(len(DARVO_MOTIFS), 1)
-    # Contradiction still binary
-    contradiction_score = 1.0 if contradiction_flag else 0.0
-    # Final DARVO score
-    return round(min(
-        0.3 * pattern_hits +
-        0.3 * sentiment_shift_score +
-        0.25 * motif_score +
-        0.15 * contradiction_score, 1.0
-    ), 3)
 def detect_weapon_language(text):
     weapon_keywords = [
         "knife", "knives", "stab", "cut you", "cutting",
@@ -420,35 +354,6 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
     base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
     return base
-    WHY_FLAGGED = {
-        "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
-        "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
-        "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
-        "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
-        "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
-        "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
-        "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
-        "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
-        "contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
-        "obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
-        "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
-}
-    explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
-    base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
-    base += f"This message shows strong indicators of **{pattern_label}**. "
-    if risk_level == "high":
-        base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
-    elif risk_level == "moderate":
-        base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
-    else:
-        base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
-    base += f"\n💡 *Why this might be flagged:*\n{explanation}\n"
-    base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
-    base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
-    return base
     # --- Step X: Detect Immediate Danger Threats ---
 THREAT_MOTIFS = [
@@ -535,23 +440,14 @@ def analyze_single_message(text, thresholds):
         k: v + 0.05 if sentiment == "supportive" else v
         for k, v in thresholds.items()
     }
-    contradiction_flag = detect_contradiction(text)
     threshold_labels = [
         label for label, score in zip(LABELS, scores)
         if score > adjusted_thresholds[label]
     ]
     tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, 0)
-    motifs = [phrase for _, phrase in matched_phrases]
-    darvo_score = calculate_darvo_score(
-        threshold_labels,
-        sentiment_before=0.0,
-        sentiment_after=sentiment_score,
-        motifs_found=motifs,
-        contradiction_flag=contradiction_flag
-    )
     top_patterns = sorted(
         [(label, score) for label, score in zip(LABELS, scores)],
@@ -621,8 +517,6 @@ def analyze_single_message(text, thresholds):
         print(f"  {label:25} → {score:.3f} {passed}")
     print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
     print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
-    print(f"Motifs: {motifs}")
-    print(f"Contradiction: {contradiction_flag}")
     print("------------------\n")
     return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag

     ("Violence has increased in frequency or severity", 3),
     ("Partner monitors your calls/GPS/social media", 2)
 ]
 def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
     sadness = emotions.get("sadness", 0)
     joy = emotions.get("joy", 0)
         return "emotional instability"
     return None
+# 🔄 New DARVO score model (regression-based)
+from torch.nn.functional import sigmoid
+import torch
+# Load your trained DARVO regressor from Hugging Face Hub
+darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
+darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
+darvo_model.eval()
+def predict_darvo_score(text):
+    inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        logits = darvo_model(**inputs).logits
+        score = sigmoid(logits).item()
+    return round(score, 4)  # Rounded for display/output
 def detect_weapon_language(text):
     weapon_keywords = [
         "knife", "knives", "stab", "cut you", "cutting",
     base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
     return base
     # --- Step X: Detect Immediate Danger Threats ---
 THREAT_MOTIFS = [
         k: v + 0.05 if sentiment == "supportive" else v
         for k, v in thresholds.items()
     }
+    darvo_score = predict_darvo_score(text)
     threshold_labels = [
         label for label, score in zip(LABELS, scores)
         if score > adjusted_thresholds[label]
     ]
     tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, 0)
     top_patterns = sorted(
         [(label, score) for label, score in zip(LABELS, scores)],
         print(f"  {label:25} → {score:.3f} {passed}")
     print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
     print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
     print("------------------\n")
     return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag