Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on Jun 5

Commit

9f9c218

verified ·

1 Parent(s): 012c587

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -13

app.py CHANGED Viewed

@@ -429,7 +429,7 @@ def analyze_single_message(text, thresholds):
             return 0.0, [], [], {"label": "none"}, 1, 0.0, None
         # Check for explicit abuse
-        explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
         explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
         logger.debug(f"Explicit abuse detected: {explicit_abuse}")
@@ -446,6 +446,22 @@ def analyze_single_message(text, thresholds):
         for label, score in zip(LABELS, raw_scores):
             logger.debug(f"{label}: {score:.3f}")
         # Get predictions and sort them
         predictions = list(zip(LABELS, raw_scores))
         sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
@@ -455,19 +471,30 @@ def analyze_single_message(text, thresholds):
         # Apply thresholds
         threshold_labels = []
-        if explicit_abuse:
-            threshold_labels.append("insults")
-            logger.debug("\nForced inclusion of 'insults' due to explicit abuse")
-        for label, score in sorted_predictions:
-            base_threshold = thresholds.get(label, 0.25)
             if explicit_abuse:
-                base_threshold *= 0.5
-            if score > base_threshold:
-                if label not in threshold_labels:
-                    threshold_labels.append(label)
-        logger.debug("\nLabels that passed thresholds:", threshold_labels)
         # Calculate matched scores
         matched_scores = []
@@ -501,6 +528,8 @@ def analyze_single_message(text, thresholds):
             abuse_score = max(abuse_score, 70.0)
         if compound_threat_boost:
             abuse_score = max(abuse_score, 85.0)  # force high score if compound risk detected
         # Get DARVO score
         darvo_score = predict_darvo_score(text)

             return 0.0, [], [], {"label": "none"}, 1, 0.0, None
         # Check for explicit abuse
+        explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick', "make you regret it"]
         explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
         logger.debug(f"Explicit abuse detected: {explicit_abuse}")
         for label, score in zip(LABELS, raw_scores):
             logger.debug(f"{label}: {score:.3f}")
+            # Initialize lists before checking control
+        threshold_labels = []
+        matched_scores = []
+            # Add control check
+        control_score = raw_scores[LABELS.index("control")]
+        if control_score > 0.3:  # Lower threshold for control
+            if "control" not in threshold_labels:
+                threshold_labels.append("control")
+                matched_scores.append(("control", control_score, PATTERN_WEIGHTS["control"]))
+            # Get predictions and sort them (continue with existing code)
+        predictions = list(zip(LABELS, raw_scores))
+        sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
         # Get predictions and sort them
         predictions = list(zip(LABELS, raw_scores))
         sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
         # Apply thresholds
         threshold_labels = []
             if explicit_abuse:
+                threshold_labels.append("insults")
+                logger.debug("\nForced inclusion of 'insults' due to explicit abuse")
+            for label, score in sorted_predictions:
+                if label == "nonabusive":
+                    continue  # Skip nonabusive label
+                base_threshold = thresholds.get(label, 0.25)
+                if explicit_abuse:
+                    base_threshold *= 0.5
+                if score > base_threshold:
+                    if label not in threshold_labels:
+            threshold_labels.append(label)
+# Calculate matched scores (exclude nonabusive)
+            matched_scores = []
+            for label in threshold_labels:
+                if label == "nonabusive":
+                    continue
+                score = raw_scores[LABELS.index(label)]
+                weight = PATTERN_WEIGHTS.get(label, 1.0)
+                if explicit_abuse and label == "insults":
+                    weight *= 1.5
+                matched_scores.append((label, score, weight))
         # Calculate matched scores
         matched_scores = []
             abuse_score = max(abuse_score, 70.0)
         if compound_threat_boost:
             abuse_score = max(abuse_score, 85.0)  # force high score if compound risk detected
+        if "control" in [label for label, _, _ in matched_scores]:
+            abuse_score = max(abuse_score, 70.0)  # Minimum score for control patterns
         # Get DARVO score
         darvo_score = predict_darvo_score(text)