Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on 29 days ago

Commit

9c4d0f5

verified ·

1 Parent(s): 025be57

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -2

app.py CHANGED Viewed

@@ -96,7 +96,7 @@ SENTIMENT_LABELS = ["undermining", "supportive"]
 THRESHOLDS = {
     "recovery phase": 0.324,
-    "control": 0.433,
     "gaslighting": 0.285,
     "guilt tripping": 0.267,
     "dismissiveness": 0.123,
@@ -110,7 +110,7 @@ THRESHOLDS = {
 PATTERN_WEIGHTS = {
-    "recovery": 0.7,
     "control": 1.4,
     "gaslighting": 1.3,
     "guilt tripping": 1.2,
@@ -328,6 +328,86 @@ def get_risk_stage(patterns, sentiment):
     except Exception as e:
         logger.error(f"Error determining risk stage: {e}")
         return 1
 @spaces.GPU
 def compute_abuse_score(matched_scores, sentiment):
@@ -453,6 +533,16 @@ def analyze_single_message(text, thresholds):
         if explicit_abuse:
             abuse_score = max(abuse_score, 70.0)
         # Get DARVO score
         darvo_score = predict_darvo_score(text)
@@ -810,6 +900,15 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
                 logger.debug("  ✓ No escalation factors")
         logger.debug(f"\n📊 Total Escalation Bump: +{escalation_bump}")
         # Combined Risk Calculation
         logger.debug("\n🎯 FINAL RISK CALCULATION")

 THRESHOLDS = {
     "recovery phase": 0.324,
+    "control": 0.33,
     "gaslighting": 0.285,
     "guilt tripping": 0.267,
     "dismissiveness": 0.123,
 PATTERN_WEIGHTS = {
+    "recovery phase": 0.7,
     "control": 1.4,
     "gaslighting": 1.3,
     "guilt tripping": 1.2,
     except Exception as e:
         logger.error(f"Error determining risk stage: {e}")
         return 1
+def detect_threat_pattern(text, patterns):
+    """Detect if a message contains threat patterns"""
+    # Threat indicators in text
+    threat_words = [
+        "regret", "sorry", "pay", "hurt", "suffer", "destroy", "ruin",
+        "expose", "tell everyone", "never see", "take away", "lose",
+        "control", "make sure", "won't let", "force", "warn", "never",
+        "punish", "teach you", "learn", "show you", "remember",
+        "if you", "don't forget", "i control", "i'll make sure",  # Added these specific phrases
+        "bank account", "phone", "money", "access"  # Added financial control indicators
+    ]
+    # Check for conditional threats (if/then structures)
+    text_lower = text.lower()
+    conditional_threat = (
+        "if" in text_lower and
+        any(word in text_lower for word in ["regret", "make sure", "control"])
+    )
+    has_threat_words = any(word in text_lower for word in threat_words)
+    # Check for threat patterns
+    threat_patterns = {"control", "gaslighting", "blame shifting", "insults"}
+    has_threat_patterns = any(p in threat_patterns for p in patterns)
+    return has_threat_words or has_threat_patterns or conditional_threat
+def detect_compound_threat(text, patterns):
+    """Detect compound threats in a single message"""
+    try:
+        # Rule A: Single Message Multiple Patterns
+        high_risk_patterns = {"control", "gaslighting", "blame shifting", "insults"}
+        high_risk_count = sum(1 for p in patterns if p in high_risk_patterns)
+        has_threat = detect_threat_pattern(text, patterns)
+        # Special case for control + threats
+        has_control = "control" in patterns
+        has_conditional_threat = "if" in text.lower() and any(word in text.lower()
+            for word in ["regret", "make sure", "control"])
+        # Single message compound threat
+        if (has_threat and high_risk_count >= 2) or (has_control and has_conditional_threat):
+            return True, "single_message"
+        return False, None
+    except Exception as e:
+        logger.error(f"Error in compound threat detection: {e}")
+        return False, None
+def analyze_message_batch_threats(messages, results):
+    """Analyze multiple messages for compound threats"""
+    threat_messages = []
+    support_messages = []
+    for i, (msg, (result, _)) in enumerate(zip(messages, results)):
+        if not msg.strip():  # Skip empty messages
+            continue
+        patterns = result[1]    # Get detected patterns
+        # Check for threat in this message
+        if detect_threat_pattern(msg, patterns):
+            threat_messages.append(i)
+        # Check for supporting patterns
+        if any(p in {"control", "gaslighting", "blame shifting"} for p in patterns):
+            support_messages.append(i)
+    # Rule B: Multi-Message Accumulation
+    if len(threat_messages) >= 2:
+        return True, "multiple_threats"
+    elif len(threat_messages) == 1 and len(support_messages) >= 2:
+        return True, "threat_with_support"
+    return False, None
 @spaces.GPU
 def compute_abuse_score(matched_scores, sentiment):
         if explicit_abuse:
             abuse_score = max(abuse_score, 70.0)
+        # Check for compound threats
+        compound_threat_flag, threat_type = detect_compound_threat(
+        text, threshold_labels
+    )
+        if compound_threat_flag:
+            logger.debug(f"⚠️ Compound threat detected in message: {threat_type}")
+            abuse_score = max(abuse_score, 85.0)  # Force high score for compound threats
         # Get DARVO score
         darvo_score = predict_darvo_score(text)
                 logger.debug("  ✓ No escalation factors")
         logger.debug(f"\n📊 Total Escalation Bump: +{escalation_bump}")
+        # Check for compound threats across messages
+        compound_threat_flag, threat_type = analyze_message_batch_threats(
+            [msg1, msg2, msg3], results
+        )
+        if compound_threat_flag:
+            logger.debug(f"⚠️ Compound threat detected across messages: {threat_type}")
+            pattern_escalation_risk = "Critical"  # Override risk level
+            logger.debug("Risk level elevated to CRITICAL due to compound threats")
         # Combined Risk Calculation
         logger.debug("\n🎯 FINAL RISK CALCULATION")