Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -429,7 +429,7 @@ def analyze_single_message(text, thresholds):
|
|
429 |
return 0.0, [], [], {"label": "none"}, 1, 0.0, None
|
430 |
|
431 |
# Check for explicit abuse
|
432 |
-
explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
|
433 |
explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
|
434 |
logger.debug(f"Explicit abuse detected: {explicit_abuse}")
|
435 |
|
@@ -446,6 +446,22 @@ def analyze_single_message(text, thresholds):
|
|
446 |
for label, score in zip(LABELS, raw_scores):
|
447 |
logger.debug(f"{label}: {score:.3f}")
|
448 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
449 |
# Get predictions and sort them
|
450 |
predictions = list(zip(LABELS, raw_scores))
|
451 |
sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
|
@@ -455,19 +471,30 @@ def analyze_single_message(text, thresholds):
|
|
455 |
|
456 |
# Apply thresholds
|
457 |
threshold_labels = []
|
458 |
-
if explicit_abuse:
|
459 |
-
threshold_labels.append("insults")
|
460 |
-
logger.debug("\nForced inclusion of 'insults' due to explicit abuse")
|
461 |
-
|
462 |
-
for label, score in sorted_predictions:
|
463 |
-
base_threshold = thresholds.get(label, 0.25)
|
464 |
if explicit_abuse:
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
|
472 |
# Calculate matched scores
|
473 |
matched_scores = []
|
@@ -501,6 +528,8 @@ def analyze_single_message(text, thresholds):
|
|
501 |
abuse_score = max(abuse_score, 70.0)
|
502 |
if compound_threat_boost:
|
503 |
abuse_score = max(abuse_score, 85.0) # force high score if compound risk detected
|
|
|
|
|
504 |
# Get DARVO score
|
505 |
darvo_score = predict_darvo_score(text)
|
506 |
|
|
|
429 |
return 0.0, [], [], {"label": "none"}, 1, 0.0, None
|
430 |
|
431 |
# Check for explicit abuse
|
432 |
+
explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick', "make you regret it"]
|
433 |
explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
|
434 |
logger.debug(f"Explicit abuse detected: {explicit_abuse}")
|
435 |
|
|
|
446 |
for label, score in zip(LABELS, raw_scores):
|
447 |
logger.debug(f"{label}: {score:.3f}")
|
448 |
|
449 |
+
# Initialize lists before checking control
|
450 |
+
threshold_labels = []
|
451 |
+
matched_scores = []
|
452 |
+
|
453 |
+
# Add control check
|
454 |
+
control_score = raw_scores[LABELS.index("control")]
|
455 |
+
if control_score > 0.3: # Lower threshold for control
|
456 |
+
if "control" not in threshold_labels:
|
457 |
+
threshold_labels.append("control")
|
458 |
+
matched_scores.append(("control", control_score, PATTERN_WEIGHTS["control"]))
|
459 |
+
|
460 |
+
# Get predictions and sort them (continue with existing code)
|
461 |
+
predictions = list(zip(LABELS, raw_scores))
|
462 |
+
sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
|
463 |
+
|
464 |
+
|
465 |
# Get predictions and sort them
|
466 |
predictions = list(zip(LABELS, raw_scores))
|
467 |
sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
|
|
|
471 |
|
472 |
# Apply thresholds
|
473 |
threshold_labels = []
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
if explicit_abuse:
|
475 |
+
threshold_labels.append("insults")
|
476 |
+
logger.debug("\nForced inclusion of 'insults' due to explicit abuse")
|
477 |
+
|
478 |
+
for label, score in sorted_predictions:
|
479 |
+
if label == "nonabusive":
|
480 |
+
continue # Skip nonabusive label
|
481 |
+
base_threshold = thresholds.get(label, 0.25)
|
482 |
+
if explicit_abuse:
|
483 |
+
base_threshold *= 0.5
|
484 |
+
if score > base_threshold:
|
485 |
+
if label not in threshold_labels:
|
486 |
+
threshold_labels.append(label)
|
487 |
+
|
488 |
+
# Calculate matched scores (exclude nonabusive)
|
489 |
+
matched_scores = []
|
490 |
+
for label in threshold_labels:
|
491 |
+
if label == "nonabusive":
|
492 |
+
continue
|
493 |
+
score = raw_scores[LABELS.index(label)]
|
494 |
+
weight = PATTERN_WEIGHTS.get(label, 1.0)
|
495 |
+
if explicit_abuse and label == "insults":
|
496 |
+
weight *= 1.5
|
497 |
+
matched_scores.append((label, score, weight))
|
498 |
|
499 |
# Calculate matched scores
|
500 |
matched_scores = []
|
|
|
528 |
abuse_score = max(abuse_score, 70.0)
|
529 |
if compound_threat_boost:
|
530 |
abuse_score = max(abuse_score, 85.0) # force high score if compound risk detected
|
531 |
+
if "control" in [label for label, _, _ in matched_scores]:
|
532 |
+
abuse_score = max(abuse_score, 70.0) # Minimum score for control patterns
|
533 |
# Get DARVO score
|
534 |
darvo_score = predict_darvo_score(text)
|
535 |
|