SamanthaStorm commited on
Commit
9f9c218
·
verified ·
1 Parent(s): 012c587

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -13
app.py CHANGED
@@ -429,7 +429,7 @@ def analyze_single_message(text, thresholds):
429
  return 0.0, [], [], {"label": "none"}, 1, 0.0, None
430
 
431
  # Check for explicit abuse
432
- explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
433
  explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
434
  logger.debug(f"Explicit abuse detected: {explicit_abuse}")
435
 
@@ -446,6 +446,22 @@ def analyze_single_message(text, thresholds):
446
  for label, score in zip(LABELS, raw_scores):
447
  logger.debug(f"{label}: {score:.3f}")
448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
  # Get predictions and sort them
450
  predictions = list(zip(LABELS, raw_scores))
451
  sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
@@ -455,19 +471,30 @@ def analyze_single_message(text, thresholds):
455
 
456
  # Apply thresholds
457
  threshold_labels = []
458
- if explicit_abuse:
459
- threshold_labels.append("insults")
460
- logger.debug("\nForced inclusion of 'insults' due to explicit abuse")
461
-
462
- for label, score in sorted_predictions:
463
- base_threshold = thresholds.get(label, 0.25)
464
  if explicit_abuse:
465
- base_threshold *= 0.5
466
- if score > base_threshold:
467
- if label not in threshold_labels:
468
- threshold_labels.append(label)
469
-
470
- logger.debug("\nLabels that passed thresholds:", threshold_labels)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
 
472
  # Calculate matched scores
473
  matched_scores = []
@@ -501,6 +528,8 @@ def analyze_single_message(text, thresholds):
501
  abuse_score = max(abuse_score, 70.0)
502
  if compound_threat_boost:
503
  abuse_score = max(abuse_score, 85.0) # force high score if compound risk detected
 
 
504
  # Get DARVO score
505
  darvo_score = predict_darvo_score(text)
506
 
 
429
  return 0.0, [], [], {"label": "none"}, 1, 0.0, None
430
 
431
  # Check for explicit abuse
432
+ explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick', "make you regret it"]
433
  explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
434
  logger.debug(f"Explicit abuse detected: {explicit_abuse}")
435
 
 
446
  for label, score in zip(LABELS, raw_scores):
447
  logger.debug(f"{label}: {score:.3f}")
448
 
449
+ # Initialize lists before checking control
450
+ threshold_labels = []
451
+ matched_scores = []
452
+
453
+ # Add control check
454
+ control_score = raw_scores[LABELS.index("control")]
455
+ if control_score > 0.3: # Lower threshold for control
456
+ if "control" not in threshold_labels:
457
+ threshold_labels.append("control")
458
+ matched_scores.append(("control", control_score, PATTERN_WEIGHTS["control"]))
459
+
460
+ # Get predictions and sort them (continue with existing code)
461
+ predictions = list(zip(LABELS, raw_scores))
462
+ sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
463
+
464
+
465
  # Get predictions and sort them
466
  predictions = list(zip(LABELS, raw_scores))
467
  sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
 
471
 
472
  # Apply thresholds
473
  threshold_labels = []
 
 
 
 
 
 
474
  if explicit_abuse:
475
+ threshold_labels.append("insults")
476
+ logger.debug("\nForced inclusion of 'insults' due to explicit abuse")
477
+
478
+ for label, score in sorted_predictions:
479
+ if label == "nonabusive":
480
+ continue # Skip nonabusive label
481
+ base_threshold = thresholds.get(label, 0.25)
482
+ if explicit_abuse:
483
+ base_threshold *= 0.5
484
+ if score > base_threshold:
485
+ if label not in threshold_labels:
486
+ threshold_labels.append(label)
487
+
488
+ # Calculate matched scores (exclude nonabusive)
489
+ matched_scores = []
490
+ for label in threshold_labels:
491
+ if label == "nonabusive":
492
+ continue
493
+ score = raw_scores[LABELS.index(label)]
494
+ weight = PATTERN_WEIGHTS.get(label, 1.0)
495
+ if explicit_abuse and label == "insults":
496
+ weight *= 1.5
497
+ matched_scores.append((label, score, weight))
498
 
499
  # Calculate matched scores
500
  matched_scores = []
 
528
  abuse_score = max(abuse_score, 70.0)
529
  if compound_threat_boost:
530
  abuse_score = max(abuse_score, 85.0) # force high score if compound risk detected
531
+ if "control" in [label for label, _, _ in matched_scores]:
532
+ abuse_score = max(abuse_score, 70.0) # Minimum score for control patterns
533
  # Get DARVO score
534
  darvo_score = predict_darvo_score(text)
535