Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on Jun 3

Commit

68ee468

verified ·

1 Parent(s): e345a71

Update app.py

Browse files

Files changed (1) hide show

app.py +561 -734

app.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import gradio as gr
-import spaces
 import torch
 import numpy as np
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
-from motif_tagging import detect_motifs
 import re
 import matplotlib.pyplot as plt
 import io
@@ -11,18 +13,71 @@ from PIL import Image
 from datetime import datetime
 from torch.nn.functional import sigmoid
 from collections import Counter
-# ─── Abuse Model ─────────────────────────────────────────────────
-model_name = "SamanthaStorm/tether-multilabel-v4"
-model      = AutoModelForSequenceClassification.from_pretrained(model_name)
-tokenizer  = AutoTokenizer.from_pretrained(model_name, use_fast=False)
 LABELS = [
-    "recovery", "control", "gaslighting", "guilt tripping", "dismissiveness", "blame shifting",
-    "nonabusive","projection", "insults", "contradictory statements", "obscure language"
 ]
 THRESHOLDS = {
     "recovery": 0.4,
     "control": 0.45,
@@ -64,767 +119,539 @@ ESCALATION_QUESTIONS = [
     ("Partner monitors your calls/GPS/social media", 2)
 ]
-# ─── Escalation Risk Mapping ────────────────────────────────────
-ESCALATION_QUESTIONS = [
-    ("Partner has access to firearms or weapons", 4),
-    ("Partner threatened to kill you", 3),
-    ("Partner threatened you with a weapon", 3),
-    ("Partner has ever choked you, even if you considered it consensual at the time", 4),
-    ("Partner injured or threatened your pet(s)", 3),
-    ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
-    ("Partner forced or coerced you into unwanted sexual acts", 3),
-    ("Partner threatened to take away your children", 2),
-    ("Violence has increased in frequency or severity", 3),
-    ("Partner monitors your calls/GPS/social media", 2)
-]
-# ─── Escalation Risk Mapping ────────────────────────────────────
-ESCALATION_RISKS = {
-    "blame shifting": "low",
-    "contradictory statements": "moderate",
-    "control": "high",
-    "dismissiveness": "moderate",
-    "gaslighting": "moderate",
-    "guilt tripping": "moderate",
-    "insults": "moderate",
-    "obscure language": "low",
-    "projection": "low",
-    "recovery phase": "low"
-}
-# ─── Risk Stage Labels ─────────────────────────────────────────
-# ─── Risk Stage Labels ─────────────────────────────────────────
 RISK_STAGE_LABELS = {
-    1: "🌀 Risk Stage: Tension-Building\n"
-       "This message reflects rising emotional pressure or subtle control attempts.",
-    2: "🔥 Risk Stage: Escalation\n"
-       "This message includes direct or aggressive patterns, suggesting active harm.",
-    3: "🌧️ Risk Stage: Reconciliation\n"
-       "This message reflects a reset attempt—apologies or emotional repair without accountability.",
-    4: "🌸 Risk Stage: Calm / Honeymoon\n"
-       "This message appears supportive but may follow prior harm, minimizing it."
 }
-# ─── Immediate Threat Motifs ───────────────────────────────────
 THREAT_MOTIFS = [
-    "i'll kill you", "i’m going to hurt you", "you’re dead", "you won't survive this",
-    "i’ll break your face", "i'll bash your head in", "i’ll snap your neck",
-    "i’ll come over there and make you shut up", "i'll knock your teeth out",
-    "you’re going to bleed", "you want me to hit you?", "i won’t hold back next time",
-    "i swear to god i’ll beat you", "next time, i won’t miss", "i’ll make you scream",
-    "i know where you live", "i'm outside", "i’ll be waiting", "i saw you with him",
-    "you can’t hide from me", "i’m coming to get you", "i'll find you", "i know your schedule",
-    "i watched you leave", "i followed you home", "you'll regret this", "you’ll be sorry",
-    "you’re going to wish you hadn’t", "you brought this on yourself", "don’t push me",
-    "you have no idea what i’m capable of", "you better watch yourself",
-    "i don’t care what happens to you anymore", "i’ll make you suffer", "you’ll pay for this",
-    "i’ll never let you go", "you’re nothing without me", "if you leave me, i’ll kill myself",
-    "i'll ruin you", "i'll tell everyone what you did", "i’ll make sure everyone knows",
-    "i’m going to destroy your name", "you’ll lose everyone", "i’ll expose you",
-    "your friends will hate you", "i’ll post everything", "you’ll be cancelled",
-    "you’ll lose everything", "i’ll take the house", "i’ll drain your account",
-    "you’ll never see a dime", "you’ll be broke when i’m done", "i’ll make sure you lose your job",
-    "i’ll take your kids", "i’ll make sure you have nothing", "you can’t afford to leave me",
-    "don't make me do this", "you know what happens when i’m mad", "you’re forcing my hand",
-    "if you just behaved, this wouldn’t happen", "this is your fault",
-    "you’re making me hurt you", "i warned you", "you should have listened"
 ]
-# New Tone & Sentiment Models
-tone_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tone-tag-multilabel-v1", use_fast=False)
-tone_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tone-tag-multilabel-v1")
-TONE_LABELS = [
-    "cold invalidation", "coercive warmth", "contradictory gaslight",
-    "deflective hostility", "emotional instability", "nonabusive",
-    "performative regret", "emotional threat", "forced accountability flip"
-]
-sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment", use_fast=False)
-sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
-SENTIMENT_LABELS = ["undermining", "supportive"]
-# ─── DARVO Model ──────────────────────────────────────────────────
-darvo_model     = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
-darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
-darvo_model.eval()
 def predict_darvo_score(text):
-    inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        logits = darvo_model(**inputs).logits
-    return round(sigmoid(logits).item(), 4)
 def detect_weapon_language(text):
-    weapon_keywords = ["knife","gun","bomb","weapon","kill","stab"]
     t = text.lower()
     return any(w in t for w in weapon_keywords)
-# ─── Updated Risk Stage Logic ─────────────────────────────────────
-RISK_STAGE_LABELS = {
-    1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
-    2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
-    3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
-    4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
-}
 def get_risk_stage(patterns, sentiment):
-    if "insults" in patterns:
-        return 2
-    elif "recovery" in patterns:
-        return 3
-    elif "control" in patterns or "guilt tripping" in patterns:
         return 1
-    elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
-        return 4
-    return 1
-# ─── Emotion & Tone Removed (unneeded) ───────────────────────────
-# (Emotion model block removed)
-# ─── Replace get_emotional_tone_tag ──────────────────────────────
 def get_emotional_tone_tag(text, emotions, sentiment, patterns, abuse_score):
-    inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        logits = tone_model(**inputs).logits[0]
-    probs = torch.sigmoid(logits).cpu().numpy()
-    scores = dict(zip(TONE_LABELS, np.round(probs, 3)))
-    return max(scores, key=scores.get)
-@spaces.GPU
 def compute_abuse_score(matched_scores, sentiment):
-    """
-    Compute abuse score from matched patterns and sentiment
-    """
-    if not matched_scores:
-        return 0.0
-    # Calculate weighted score
-    total_weight = sum(weight for _, _, weight in matched_scores)
-    if total_weight == 0:
-        return 0.0
-    # Get highest pattern scores
-    pattern_scores = [(label, score) for label, score, _ in matched_scores]
-    sorted_scores = sorted(pattern_scores, key=lambda x: x[1], reverse=True)
-    # Base score calculation
-    weighted_sum = sum(score * weight for _, score, weight in matched_scores)
-    base_score = (weighted_sum / total_weight) * 100
-    # Pattern combination multipliers
-    if len(matched_scores) >= 3:  # Multiple patterns detected
-        base_score *= 1.2  # 20% increase for pattern combinations
-    # High severity patterns
-    high_severity_patterns = {'gaslighting', 'control', 'blame shifting'}
-    if any(label in high_severity_patterns for label, _, _ in matched_scores):
-        base_score *= 1.15  # 15% increase for high severity patterns
-    # Pattern strength boosters
-    if any(score > 0.6 for _, score, _ in matched_scores):  # Any pattern > 60%
-        base_score *= 1.1  # 10% increase for strong patterns
-    # Multiple high scores
-    high_scores = len([score for _, score, _ in matched_scores if score > 0.5])
-    if high_scores >= 2:
-        base_score *= 1.15  # 15% increase for multiple high scores
-    # Apply sentiment modifier
-    if sentiment == "supportive":
-        # Less reduction for supportive sentiment when high severity patterns present
         if any(label in high_severity_patterns for label, _, _ in matched_scores):
-            base_score *= 0.9  # Only 10% reduction
-        else:
-            base_score *= 0.85  # Normal 15% reduction
-    elif sentiment == "undermining":
-        base_score *= 1.15  # 15% increase for undermining sentiment
-    # Ensure minimum score for strong patterns
-    if any(score > 0.6 for _, score, _ in matched_scores):
-        base_score = max(base_score, 65.0)
-    # Cap maximum score
-    return min(round(base_score, 1), 100.0)
 def analyze_single_message(text, thresholds):
-    print("\n=== DEBUG START ===")
-    print(f"Input text: {text}")
-    if not text.strip():
-        print("Empty text, returning zeros")
-        return 0.0, [], [], {"label": "none"}, 1, 0.0, None
-    # Check for explicit abuse
-    explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
-    explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
-    print(f"Explicit abuse detected: {explicit_abuse}")
-    # Abuse model inference
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        outputs = model(**inputs)
-    raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
-    # Print raw model outputs
-    print("\nRaw model scores:")
-    for label, score in zip(LABELS, raw_scores):
-        print(f"{label}: {score:.3f}")
-    # Get predictions and sort them
-    predictions = list(zip(LABELS, raw_scores))
-    sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
-    print("\nTop 3 predictions:")
-    for label, score in sorted_predictions[:3]:
-        print(f"{label}: {score:.3f}")
-    # Apply thresholds
-    threshold_labels = []
-    if explicit_abuse:
-        threshold_labels.append("insults")
-        print("\nForced inclusion of 'insults' due to explicit abuse")
-    for label, score in sorted_predictions:
-        base_threshold = thresholds.get(label, 0.25)
         if explicit_abuse:
-            base_threshold *= 0.5
-        if score > base_threshold:
-            if label not in threshold_labels:  # Avoid duplicates
-                threshold_labels.append(label)
-    print("\nLabels that passed thresholds:", threshold_labels)
-    # Calculate matched scores
-    matched_scores = []
-    for label in threshold_labels:
-        score = raw_scores[LABELS.index(label)]
-        weight = PATTERN_WEIGHTS.get(label, 1.0)
-        if explicit_abuse and label == "insults":
-            weight *= 1.5
-        matched_scores.append((label, score, weight))
-    print("\nMatched scores (label, score, weight):", matched_scores)
-    # Calculate abuse score
-    if not matched_scores:
-        print("No matched scores, returning 0")
-        return 0.0, [], [], {"label": "undermining"}, 2 if explicit_abuse else 1, 0.0, None
-    weighted_sum = sum(score * weight for _, score, weight in matched_scores)
-    total_weight = sum(weight for _, _, weight in matched_scores)
-    abuse_score = (weighted_sum / total_weight) * 100
-    if explicit_abuse:
-        abuse_score = max(abuse_score, 70.0)
-    print(f"\nCalculated abuse score: {abuse_score}")
-    # Get sentiment
-    sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        sent_logits = sentiment_model(**sent_inputs).logits[0]
-    sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
-    sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
-    print(f"\nDetected sentiment: {sentiment}")
-    # Get tone
-    tone_inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        tone_logits = tone_model(**tone_inputs).logits[0]
-    tone_probs = torch.sigmoid(tone_logits).cpu().numpy()
-    tone_tag = TONE_LABELS[int(np.argmax(tone_probs))]
-    print(f"Detected tone: {tone_tag}")
-    # Get DARVO score
-    darvo_score = predict_darvo_score(text)
-    print(f"DARVO score: {darvo_score}")
-    # Set stage
-    stage = 2 if explicit_abuse or abuse_score > 70 else 1
-    print(f"Final stage: {stage}")
-    print("=== DEBUG END ===\n")
-    return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
-def generate_risk_snippet(abuse_score, patterns, hybrid_score, stage):
-    """
-    Enhanced risk assessment generator with more nuanced scoring and pattern analysis
-    Parameters:
-    - abuse_score: float (0-100)
-    - patterns: list of detected abuse patterns
-    - hybrid_score: float (combined escalation/risk score)
-    - stage: int (1-4 representing relationship stages)
-    """
-    # Define risk thresholds with more granular levels
-    def get_risk_level(abuse_score, hybrid_score, patterns):
-        if abuse_score >= 85 or hybrid_score >= 20:
-            return "Critical"
-        elif abuse_score >= 70 or hybrid_score >= 15:
-            return "High"
-        elif abuse_score >= 50 or hybrid_score >= 10:
-            return "Moderate"
-        return "Low"
-    # Pattern severity weights
-    PATTERN_SEVERITY = {
-        "control": 3,
-        "gaslighting": 3,
-        "insults": 2,
-        "blame shifting": 2,
-        "guilt tripping": 2,
-        "dismissiveness": 1,
-        "projection": 1,
-        "contradictory statements": 1,
-        "obscure language": 1,
-        "recovery": 1
-    }
-    # Calculate weighted pattern severity
-    pattern_severity = sum(PATTERN_SEVERITY.get(p, 0) for p in patterns)
-    # Get base risk level
-    risk_level = get_risk_level(abuse_score, hybrid_score, patterns)
-    # Generate risk descriptions with more detailed context
-    risk_descriptions = {
-        "Critical": (
-            "🚨 **Risk Level: Critical**\n"
-            f"Multiple severe abuse patterns detected (Score: {abuse_score:.1f}%). "
-            "This situation shows signs of dangerous escalation and immediate intervention may be needed."
-        ),
-        "High": (
-            "⚠️ **Risk Level: High**\n"
-            f"Strong abuse patterns detected (Score: {abuse_score:.1f}%). "
-            "This situation shows concerning signs of manipulation and control."
-        ),
-        "Moderate": (
-            "⚡ **Risk Level: Moderate**\n"
-            f"Concerning patterns detected (Score: {abuse_score:.1f}%). "
-            "While not severe, these behaviors indicate unhealthy relationship dynamics."
-        ),
-        "Low": (
-            "📝 **Risk Level: Low**\n"
-            f"Minor concerning patterns detected (Score: {abuse_score:.1f}%). "
-            "While present, the detected behaviors are subtle or infrequent."
-        )
-    }
-    # Add stage-specific context
-    stage_context = {
-        1: "Current patterns suggest a tension-building phase.",
-        2: "Messages show signs of active escalation.",
-        3: "Patterns indicate attempted reconciliation without real change.",
-        4: "Surface calm may mask underlying issues."
-    }
-    # Build output
-    output = risk_descriptions[risk_level]
-    if stage in stage_context:
-        output += f"\n{stage_context[stage]}"
-    # Add pattern analysis if patterns detected
-    if patterns:
-        output += "\n\n🔍 **Detected Patterns:**"
-        for pattern in patterns:
-            severity = PATTERN_SEVERITY.get(pattern, 0)
-            output += f"\n• {pattern.title()} (Severity: {'❗' * severity})"
-    # Add safety recommendations based on risk level
-    if risk_level in ["Critical", "High"]:
-        output += "\n\n⚠️ **Safety Recommendations:**"
-        output += "\n• Consider reaching out to a domestic violence hotline"
-        output += "\n• Document all concerning interactions"
-        output += "\n• Have a safety plan in place"
-    return output
 def generate_abuse_score_chart(dates, scores, patterns):
-    """
-    Generate a timeline chart of abuse scores
-    """
-    plt.figure(figsize=(10, 6))
-    plt.clf()
-    # Create new figure
-    fig, ax = plt.subplots(figsize=(10, 6))
-    # Plot points and lines
-    x = range(len(scores))
-    plt.plot(x, scores, 'bo-', linewidth=2, markersize=8)
-    # Add labels for each point
-    for i, (score, pattern) in enumerate(zip(scores, patterns)):
-        plt.annotate(
-            f'{pattern}\n{score:.0f}%',
-            (i, score),
-            textcoords="offset points",
-            xytext=(0, 10),
-            ha='center',
-            bbox=dict(
-                boxstyle='round,pad=0.5',
-                fc='white',
-                ec='gray',
-                alpha=0.8
             )
-        )
-    # Customize the plot
-    plt.ylim(-5, 105)
-    plt.grid(True, linestyle='--', alpha=0.7)
-    plt.title('Abuse Pattern Timeline', pad=20, fontsize=12)
-    plt.ylabel('Abuse Score %')
-    # X-axis labels
-    plt.xticks(x, dates, rotation=45)
-    # Risk level bands with better colors
-    plt.axhspan(0, 50, color='#90EE90', alpha=0.2)  # light green
-    plt.axhspan(50, 70, color='#FFD700', alpha=0.2)  # gold
-    plt.axhspan(70, 85, color='#FFA500', alpha=0.2)  # orange
-    plt.axhspan(85, 100, color='#FF6B6B', alpha=0.2)  # light red
-    # Add risk level labels
-    plt.text(-0.2, 25, 'Low Risk', rotation=90, va='center')
-    plt.text(-0.2, 60, 'Moderate Risk', rotation=90, va='center')
-    plt.text(-0.2, 77.5, 'High Risk', rotation=90, va='center')
-    plt.text(-0.2, 92.5, 'Critical Risk', rotation=90, va='center')
-    # Adjust layout
-    plt.tight_layout()
-    # Convert plot to image
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png', bbox_inches='tight')
-    buf.seek(0)
-    plt.close('all')  # Close all figures to prevent memory leaks
-    return Image.open(buf)
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
-    from collections import Counter
-    none_selected_checked = answers_and_none[-1]
-    responses_checked = any(answers_and_none[:-1])
-    none_selected = not responses_checked and none_selected_checked
-    if none_selected:
-        escalation_score = 0
-        escalation_note = "Checklist completed: no danger items reported."
-        escalation_completed = True
-    elif responses_checked:
-        escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
-        escalation_note = "Checklist completed."
-        escalation_completed = True
-    else:
-        escalation_score = None
-        escalation_note = "Checklist not completed."
-        escalation_completed = False
-    messages = [msg1, msg2, msg3]
-    active = [(m, f"Message {i+1}") for i, m in enumerate(messages) if m.strip()]
-    if not active:
-        return "Please enter at least one message.", None
-    # Flag any threat phrases present in the messages
-    import re
-    def normalize(text):
-        import unicodedata
-        text = text.lower().strip()
-        text = unicodedata.normalize("NFKD", text)  # handles curly quotes
-        text = text.replace("’", "'")               # smart to straight
-        return re.sub(r"[^a-z0-9 ]", "", text)
-    def detect_threat_motifs(message, motif_list):
-        norm_msg = normalize(message)
-        return [
-            motif for motif in motif_list
-            if normalize(motif) in norm_msg
-    ]
-# Collect matches per message
-    immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
-    flat_threats = [t for sublist in immediate_threats for t in sublist]
-    threat_risk = "Yes" if flat_threats else "No"
-    results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
-    abuse_scores = [r[0][0] for r in results]
-    stages = [r[0][4] for r in results]
-    darvo_scores = [r[0][5] for r in results]
-    tone_tags = [r[0][6] for r in results]
-    dates_used = [r[1] for r in results]
-    predicted_labels = [label for r in results for label in r[0][1]]  # Use threshold_labels instead
-    high = {'control'}
-    moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults', 'contradictory statements', 'guilt tripping'}
-    low = {'blame shifting', 'projection', 'recovery phase'}
-    counts = {'high': 0, 'moderate': 0, 'low': 0}
-    for label in predicted_labels:
-        if label in high:
-            counts['high'] += 1
-        elif label in moderate:
-            counts['moderate'] += 1
-        elif label in low:
-            counts['low'] += 1
-    # Pattern escalation logic
-    pattern_escalation_risk = "Low"
-    if counts['high'] >= 2 and counts['moderate'] >= 2:
-        pattern_escalation_risk = "Critical"
-    elif (counts['high'] >= 2 and counts['moderate'] >= 1) or (counts['moderate'] >= 3) or (counts['high'] >= 1 and counts['moderate'] >= 2):
-        pattern_escalation_risk = "High"
-    elif (counts['moderate'] == 2) or (counts['high'] == 1 and counts['moderate'] == 1) or (counts['moderate'] == 1 and counts['low'] >= 2) or (counts['high'] == 1 and sum(counts.values()) == 1):
-        pattern_escalation_risk = "Moderate"
-    checklist_escalation_risk = "Unknown" if escalation_score is None else (
-        "Critical" if escalation_score >= 20 else
-        "Moderate" if escalation_score >= 10 else
-        "Low"
-    )
-    escalation_bump = 0
-    for result, _ in results:
-        abuse_score, _, _, sentiment, stage, darvo_score, tone_tag = result
-        if darvo_score > 0.65:
-            escalation_bump += 3
-        if tone_tag in ["forced accountability flip", "emotional threat"]:
-            escalation_bump += 2
-        if abuse_score > 80:
-            escalation_bump += 2
-        if stage == 2:
-            escalation_bump += 3
-    def rank(label):
-        return {"Low": 0, "Moderate": 1, "High": 2, "Critical": 3, "Unknown": 0}.get(label, 0)
-    combined_score = rank(pattern_escalation_risk) + rank(checklist_escalation_risk) + escalation_bump
-    escalation_risk = (
-        "Critical" if combined_score >= 6 else
-        "High" if combined_score >= 4 else
-        "Moderate" if combined_score >= 2 else
-        "Low"
-    )
-    none_selected_checked = answers_and_none[-1]
-    responses_checked = any(answers_and_none[:-1])
-    none_selected = not responses_checked and none_selected_checked
-# Determine escalation_score
-    if none_selected:
-        escalation_score = 0
-        escalation_completed = True
-    elif responses_checked:
-        escalation_score = sum(
-            w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a
-        )
-        escalation_completed = True
-    else:
-        escalation_score = None
-        escalation_completed = False
-# Build escalation_text and hybrid_score
-    if escalation_score is None:
-        escalation_text = (
-            "🚫 **Escalation Potential: Unknown** (Checklist not completed)\n"
-            "⚠️ This section was not completed. Escalation potential is estimated using message data only.\n"
-        )
-        hybrid_score = 0
-    elif escalation_score == 0:
-        escalation_text = (
-            "✅ **Escalation Checklist Completed:** No danger items reported.\n"
-            "🧭 **Escalation potential estimated from detected message patterns only.**\n"
-            f"• Pattern Risk: {pattern_escalation_risk}\n"
-            f"• Checklist Risk: None reported\n"
-            f"• Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
-        )
-        hybrid_score = escalation_bump
-    else:
-        hybrid_score = escalation_score + escalation_bump
-        escalation_text = (
-            f"📈 **Escalation Potential: {escalation_risk} ({hybrid_score}/29)**\n"
-            "📋 This score combines your safety checklist answers *and* detected high-risk behavior.\n"
-            f"• Pattern Risk: {pattern_escalation_risk}\n"
-            f"• Checklist Risk: {checklist_escalation_risk}\n"
-            f"• Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
         )
-    # Composite Abuse Score
-    composite_abuse_scores = []
-    for result, _ in results:
-        abuse_score, _, matched_scores, sentiment, _, _, _ = result
-        composite_abuse_scores.append(abuse_score)  # Just use the already calculated abuse score
-    composite_abuse = int(round(sum(composite_abuse_scores) / len(composite_abuse_scores)))
-    most_common_stage = max(set(stages), key=stages.count)
-    stage_text = RISK_STAGE_LABELS[most_common_stage]
-    # Derive top label list for each message
-    top_labels = []
-    for result, _ in results:
-        threshold_labels = result[1]  # Get threshold_labels from result
-        if threshold_labels:  # If we have threshold labels
-            top_labels.append(threshold_labels[0])  # Add the first one
-        else:
-            top_labels.append("none")  # Default if no labels
-    avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
-    darvo_blurb = ""
-    if avg_darvo > 0.25:
-        level = "moderate" if avg_darvo < 0.65 else "high"
-        darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
-    out = f"Abuse Intensity: {composite_abuse}%\n"
-    out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
-    out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
-    out += f"\n\n{stage_text}"
-    out += darvo_blurb
-    out += "\n\n🎭 **Emotional Tones Detected:**\n"
-    for i, tone in enumerate(tone_tags):
-        out += f"• Message {i+1}: *{tone or 'none'}*\n"
-    # --- Add Immediate Danger Threats section
-    if flat_threats:
-        out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
-        for t in set(flat_threats):
-            out += f"• \"{t}\"\n"
-        out += "\n⚠️ These phrases may indicate an imminent risk to physical safety."
-    else:
-        out += "\n\n🧩 **Immediate Danger Threats:** None explicitly detected.\n"
-        out += "This does *not* rule out risk, but no direct threat phrases were matched."
-    pattern_labels = [
-    pats[0][0] if (pats := r[0][2]) else "none"
-    for r in results
-    ]
-    timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
-    out += "\n\n" + escalation_text
-    return out, timeline_image
-textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
-quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
-none_box = gr.Checkbox(label="None of the above")
-# ─── FINAL “FORCE LAUNCH” (no guards) ────────────────────────
-demo = gr.Interface(
-    fn=analyze_composite,
-    inputs=textbox_inputs + quiz_boxes + [none_box],
-    outputs=[
-        gr.Textbox(label="Results"),
-        gr.Image(label="Abuse Score Timeline", type="pil")
-    ],
-    title="Abuse Pattern Detector + Escalation Quiz",
-    description=(
-        "Enter up to three messages that concern you. "
-        "For the most accurate results, include messages from a recent emotionally intense period."
-    ),
-    flagging_mode="manual"
-)
-# This single call will start the server and block,
-# keeping the container alive on Spaces.
-demo.launch()
-def generate_risk_snippet(abuse_score, patterns, hybrid_score, stage):
-    """
-    Enhanced risk assessment generator with more nuanced scoring and pattern analysis
-    Parameters:
-    - abuse_score: float (0-100)
-    - patterns: list of detected abuse patterns
-    - hybrid_score: float (combined escalation/risk score)
-    - stage: int (1-4 representing relationship stages)
-    """
-    # Define risk thresholds with more granular levels
-    def get_risk_level(abuse_score, hybrid_score, patterns):
-        if abuse_score >= 85 or hybrid_score >= 20:
-            return "Critical"
-        elif abuse_score >= 70 or hybrid_score >= 15:
-            return "High"
-        elif abuse_score >= 50 or hybrid_score >= 10:
-            return "Moderate"
-        return "Low"
-    # Pattern severity weights
-    PATTERN_SEVERITY = {
-        "control": 3,
-        "gaslighting": 3,
-        "insults": 2,
-        "blame shifting": 2,
-        "guilt tripping": 2,
-        "dismissiveness": 1,
-        "projection": 1,
-        "contradictory statements": 1,
-        "obscure language": 1,
-        "recovery": 1
-    }
-    # Calculate weighted pattern severity
-    pattern_severity = sum(PATTERN_SEVERITY.get(p, 0) for p in patterns)
-    # Get base risk level
-    risk_level = get_risk_level(abuse_score, hybrid_score, patterns)
-    # Generate risk descriptions with more detailed context
-    risk_descriptions = {
-        "Critical": (
-            "🚨 **Risk Level: Critical**\n"
-            f"Multiple severe abuse patterns detected (Score: {abuse_score:.1f}%). "
-            "This situation shows signs of dangerous escalation and immediate intervention may be needed."
-        ),
-        "High": (
-            "⚠️ **Risk Level: High**\n"
-            f"Strong abuse patterns detected (Score: {abuse_score:.1f}%). "
-            "This situation shows concerning signs of manipulation and control."
-        ),
-        "Moderate": (
-            "⚡ **Risk Level: Moderate**\n"
-            f"Concerning patterns detected (Score: {abuse_score:.1f}%). "
-            "While not severe, these behaviors indicate unhealthy relationship dynamics."
-        ),
-        "Low": (
-            "📝 **Risk Level: Low**\n"
-            f"Minor concerning patterns detected (Score: {abuse_score:.1f}%). "
-            "While present, the detected behaviors are subtle or infrequent."
         )
-    }
-    # Add stage-specific context
-    stage_context = {
-        1: "Current patterns suggest a tension-building phase.",
-        2: "Messages show signs of active escalation.",
-        3: "Patterns indicate attempted reconciliation without real change.",
-        4: "Surface calm may mask underlying issues."
-    }
-    # Build output
-    output = risk_descriptions[risk_level]
-    if stage in stage_context:
-        output += f"\n{stage_context[stage]}"
-    # Add pattern analysis if patterns detected
-    if patterns:
-        output += "\n\n🔍 **Detected Patterns:**"
-        for pattern in patterns:
-            severity = PATTERN_SEVERITY.get(pattern, 0)
-            output += f"\n• {pattern.title()} (Severity: {'❗' * severity})"
-    # Add safety recommendations based on risk level
-    if risk_level in ["Critical", "High"]:
-        output += "\n\n⚠️ **Safety Recommendations:**"
-        output += "\n• Consider reaching out to a domestic violence hotline"
-        output += "\n• Document all concerning interactions"
-        output += "\n• Have a safety plan in place"
-    return output

+import os
+os.environ['GRADIO_SERVER_NAME'] = "0.0.0.0"
+os.environ['GRADIO_SERVER_PORT'] = "7860"
 import gradio as gr
 import torch
 import numpy as np
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import re
 import matplotlib.pyplot as plt
 import io
 from datetime import datetime
 from torch.nn.functional import sigmoid
 from collections import Counter
+import logging
+# Set up logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+# Device configuration
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+logger.info(f"Using device: {device}")
+# Model initialization with error handling
+def load_model_and_tokenizer(model_name, model_type="main"):
+    try:
+        model = AutoModelForSequenceClassification.from_pretrained(model_name)
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+        model = model.to(device)
+        logger.info(f"Successfully loaded {model_type} model: {model_name}")
+        return model, tokenizer
+    except Exception as e:
+        logger.error(f"Error loading {model_type} model {model_name}: {e}")
+        return None, None
+# Load all models
+try:
+    # Main abuse model
+    model_name = "SamanthaStorm/tether-multilabel-v4"
+    model, tokenizer = load_model_and_tokenizer(model_name, "main")
+    # Tone model
+    tone_model, tone_tokenizer = load_model_and_tokenizer(
+        "SamanthaStorm/tone-tag-multilabel-v1", "tone"
+    )
+    # Sentiment model
+    sentiment_model, sentiment_tokenizer = load_model_and_tokenizer(
+        "SamanthaStorm/tether-sentiment", "sentiment"
+    )
+    # DARVO model
+    darvo_model, darvo_tokenizer = load_model_and_tokenizer(
+        "SamanthaStorm/tether-darvo-regressor-v1", "darvo"
+    )
+    if darvo_model:
+        darvo_model.eval()
+except Exception as e:
+    logger.error(f"Error during model initialization: {e}")
+    raise
+# Constants and Labels
 LABELS = [
+    "recovery", "control", "gaslighting", "guilt tripping", "dismissiveness",
+    "blame shifting", "nonabusive", "projection", "insults",
+    "contradictory statements", "obscure language"
+]
+TONE_LABELS = [
+    "cold invalidation", "coercive warmth", "contradictory gaslight",
+    "deflective hostility", "emotional instability", "nonabusive",
+    "performative regret", "emotional threat", "forced accountability flip"
 ]
+SENTIMENT_LABELS = ["undermining", "supportive"]
 THRESHOLDS = {
     "recovery": 0.4,
     "control": 0.45,
     ("Partner monitors your calls/GPS/social media", 2)
 ]
 RISK_STAGE_LABELS = {
+    1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
+    2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
+    3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
+    4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
 }
+# Threat Motifs
 THREAT_MOTIFS = [
+    "i'll kill you", "i'm going to hurt you", "you're dead", "you won't survive this",
+    "i'll break your face", "i'll bash your head in", "i'll snap your neck",
+    "i'll come over there and make you shut up", "i'll knock your teeth out",
+    "you're going to bleed", "you want me to hit you?", "i won't hold back next time",
+    "i swear to god i'll beat you", "next time, i won't miss", "i'll make you scream",
+    "i know where you live", "i'm outside", "i'll be waiting", "i saw you with him",
+    "you can't hide from me", "i'm coming to get you", "i'll find you", "i know your schedule",
+    "i watched you leave", "i followed you home", "you'll regret this", "you'll be sorry",
+    "you're going to wish you hadn't", "you brought this on yourself", "don't push me",
+    "you have no idea what i'm capable of", "you better watch yourself",
+    "i don't care what happens to you anymore", "i'll make you suffer", "you'll pay for this",
+    "i'll never let you go", "you're nothing without me", "if you leave me, i'll kill myself",
+    "i'll ruin you", "i'll tell everyone what you did", "i'll make sure everyone knows",
+    "i'm going to destroy your name", "you'll lose everyone", "i'll expose you",
+    "your friends will hate you", "i'll post everything", "you'll be cancelled",
+    "you'll lose everything", "i'll take the house", "i'll drain your account",
+    "you'll never see a dime", "you'll be broke when i'm done", "i'll make sure you lose your job",
+    "i'll take your kids", "i'll make sure you have nothing", "you can't afford to leave me",
+    "don't make me do this", "you know what happens when i'm mad", "you're forcing my hand",
+    "if you just behaved, this wouldn't happen", "this is your fault",
+    "you're making me hurt you", "i warned you", "you should have listened"
 ]
 def predict_darvo_score(text):
+    """Predict DARVO score for given text"""
+    try:
+        inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        with torch.no_grad():
+            logits = darvo_model(**inputs).logits
+        return round(sigmoid(logits.cpu()).item(), 4)
+    except Exception as e:
+        logger.error(f"Error in DARVO prediction: {e}")
+        return 0.0
 def detect_weapon_language(text):
+    """Detect weapon-related language in text"""
+    weapon_keywords = ["knife", "gun", "bomb", "weapon", "kill", "stab"]
     t = text.lower()
     return any(w in t for w in weapon_keywords)
 def get_risk_stage(patterns, sentiment):
+    """Determine risk stage based on patterns and sentiment"""
+    try:
+        if "insults" in patterns:
+            return 2
+        elif "recovery" in patterns:
+            return 3
+        elif "control" in patterns or "guilt tripping" in patterns:
+            return 1
+        elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
+            return 4
+        return 1
+    except Exception as e:
+        logger.error(f"Error determining risk stage: {e}")
         return 1
 def get_emotional_tone_tag(text, emotions, sentiment, patterns, abuse_score):
+    """Get emotional tone tag for text"""
+    try:
+        inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        with torch.no_grad():
+            logits = tone_model(**inputs).logits[0]
+        probs = torch.sigmoid(logits).cpu().numpy()
+        scores = dict(zip(TONE_LABELS, np.round(probs, 3)))
+        return max(scores, key=scores.get)
+    except Exception as e:
+        logger.error(f"Error in emotional tone analysis: {e}")
+        return "unknown"
 def compute_abuse_score(matched_scores, sentiment):
+    """Compute abuse score from matched patterns and sentiment"""
+    try:
+        if not matched_scores:
+            return 0.0
+        total_weight = sum(weight for _, _, weight in matched_scores)
+        if total_weight == 0:
+            return 0.0
+        pattern_scores = [(label, score) for label, score, _ in matched_scores]
+        sorted_scores = sorted(pattern_scores, key=lambda x: x[1], reverse=True)
+        weighted_sum = sum(score * weight for _, score, weight in matched_scores)
+        base_score = (weighted_sum / total_weight) * 100
+        # Apply multipliers
+        if len(matched_scores) >= 3:
+            base_score *= 1.2
+        high_severity_patterns = {'gaslighting', 'control', 'blame shifting'}
         if any(label in high_severity_patterns for label, _, _ in matched_scores):
+            base_score *= 1.15
+        if any(score > 0.6 for _, score, _ in matched_scores):
+            base_score *= 1.1
+        high_scores = len([score for _, score, _ in matched_scores if score > 0.5])
+        if high_scores >= 2:
+            base_score *= 1.15
+        # Apply sentiment modifiers
+        if sentiment == "supportive":
+            if any(label in high_severity_patterns for label, _, _ in matched_scores):
+                base_score *= 0.9
+            else:
+                base_score *= 0.85
+        elif sentiment == "undermining":
+            base_score *= 1.15
+        if any(score > 0.6 for _, score, _ in matched_scores):
+            base_score = max(base_score, 65.0)
+        return min(round(base_score, 1), 100.0)
+    except Exception as e:
+        logger.error(f"Error computing abuse score: {e}")
+        return 0.0
 def analyze_single_message(text, thresholds):
+    """Analyze a single message for abuse patterns"""
+    logger.debug("\n=== DEBUG START ===")
+    logger.debug(f"Input text: {text}")
+    try:
+        if not text.strip():
+            logger.debug("Empty text, returning zeros")
+            return 0.0, [], [], {"label": "none"}, 1, 0.0, None
+        # Check for explicit abuse
+        explicit_abuse_words = ['fuck', 'bitch', 'shit', 'ass', 'dick']
+        explicit_abuse = any(word in text.lower() for word in explicit_abuse_words)
+        logger.debug(f"Explicit abuse detected: {explicit_abuse}")
+        # Abuse model inference
+        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = model(**inputs)
+        raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).cpu().numpy()
+        # Log raw model outputs
+        logger.debug("\nRaw model scores:")
+        for label, score in zip(LABELS, raw_scores):
+            logger.debug(f"{label}: {score:.3f}")
+        # Get predictions and sort them
+        predictions = list(zip(LABELS, raw_scores))
+        sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
+        logger.debug("\nTop 3 predictions:")
+        for label, score in sorted_predictions[:3]:
+            logger.debug(f"{label}: {score:.3f}")
+        # Apply thresholds
+        threshold_labels = []
         if explicit_abuse:
+            threshold_labels.append("insults")
+            logger.debug("\nForced inclusion of 'insults' due to explicit abuse")
+        for label, score in sorted_predictions:
+            base_threshold = thresholds.get(label, 0.25)
+            if explicit_abuse:
+                base_threshold *= 0.5
+            if score > base_threshold:
+                if label not in threshold_labels:  # Avoid duplicates
+                    threshold_labels.append(label)
+        logger.debug("\nLabels that passed thresholds:", threshold_labels)
+        # Calculate matched scores
+        matched_scores = []
+        for label in threshold_labels:
+            score = raw_scores[LABELS.index(label)]
+            weight = PATTERN_WEIGHTS.get(label, 1.0)
+            if explicit_abuse and label == "insults":
+                weight *= 1.5
+            matched_scores.append((label, score, weight))
+        logger.debug("\nMatched scores (label, score, weight):", matched_scores)
+        # Get sentiment
+        sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        sent_inputs = {k: v.to(device) for k, v in sent_inputs.items()}
+        with torch.no_grad():
+            sent_logits = sentiment_model(**sent_inputs).logits[0]
+        sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
+        sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
+        logger.debug(f"\nDetected sentiment: {sentiment}")
+        # Get tone
+        tone_inputs = tone_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        tone_inputs = {k: v.to(device) for k, v in tone_inputs.items()}
+        with torch.no_grad():
+            tone_logits = tone_model(**tone_inputs).logits[0]
+        tone_probs = torch.sigmoid(tone_logits).cpu().numpy()
+        tone_tag = TONE_LABELS[int(np.argmax(tone_probs))]
+        logger.debug(f"Detected tone: {tone_tag}")
+        # Get DARVO score
+        darvo_score = predict_darvo_score(text)
+        logger.debug(f"DARVO score: {darvo_score}")
+        # Calculate abuse score
+        if not matched_scores:
+            logger.debug("No matched scores, returning 0")
+            return 0.0, [], [], {"label": "undermining"}, 2 if explicit_abuse else 1, 0.0, None
+        abuse_score = compute_abuse_score(matched_scores, sentiment)
+        if explicit_abuse:
+            abuse_score = max(abuse_score, 70.0)
+        logger.debug(f"\nCalculated abuse score: {abuse_score}")
+        # Set stage
+        stage = 2 if explicit_abuse or abuse_score > 70 else 1
+        logger.debug(f"Final stage: {stage}")
+        logger.debug("=== DEBUG END ===\n")
+        return abuse_score, threshold_labels, matched_scores, {"label": sentiment}, stage, darvo_score, tone_tag
+    except Exception as e:
+        logger.error(f"Error in analyze_single_message: {e}")
+        return 0.0, [], [], {"label": "error"}, 1, 0.0, None
 def generate_abuse_score_chart(dates, scores, patterns):
+    """Generate a timeline chart of abuse scores"""
+    try:
+        plt.figure(figsize=(10, 6))
+        plt.clf()
+        # Create new figure
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # Plot points and lines
+        x = range(len(scores))
+        plt.plot(x, scores, 'bo-', linewidth=2, markersize=8)
+        # Add labels for each point
+        for i, (score, pattern) in enumerate(zip(scores, patterns)):
+            plt.annotate(
+                f'{pattern}\n{score:.0f}%',
+                (i, score),
+                textcoords="offset points",
+                xytext=(0, 10),
+                ha='center',
+                bbox=dict(
+                    boxstyle='round,pad=0.5',
+                    fc='white',
+                    ec='gray',
+                    alpha=0.8
+                )
             )
+        # Customize the plot
+        plt.ylim(-5, 105)
+        plt.grid(True, linestyle='--', alpha=0.7)
+        plt.title('Abuse Pattern Timeline', pad=20, fontsize=12)
+        plt.ylabel('Abuse Score %')
+        # X-axis labels
+        plt.xticks(x, dates, rotation=45)
+        # Risk level bands
+        plt.axhspan(0, 50, color='#90EE90', alpha=0.2)   # light green
+        plt.axhspan(50, 70, color='#FFD700', alpha=0.2)  # gold
+        plt.axhspan(70, 85, color='#FFA500', alpha=0.2)  # orange
+        plt.axhspan(85, 100, color='#FF6B6B', alpha=0.2) # light red
+        # Add risk level labels
+        plt.text(-0.2, 25, 'Low Risk', rotation=90, va='center')
+        plt.text(-0.2, 60, 'Moderate Risk', rotation=90, va='center')
+        plt.text(-0.2, 77.5, 'High Risk', rotation=90, va='center')
+        plt.text(-0.2, 92.5, 'Critical Risk', rotation=90, va='center')
+        # Adjust layout
+        plt.tight_layout()
+        # Convert plot to image
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png', bbox_inches='tight')
+        buf.seek(0)
+        plt.close('all')  # Close all figures to prevent memory leaks
+        return Image.open(buf)
+    except Exception as e:
+        logger.error(f"Error generating abuse score chart: {e}")
+        return None
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
+    """Analyze multiple messages and checklist responses"""
+    try:
+        # Process checklist responses
+        none_selected_checked = answers_and_none[-1]
+        responses_checked = any(answers_and_none[:-1])
+        none_selected = not responses_checked and none_selected_checked
+        # Determine escalation score
+        if none_selected:
+            escalation_score = 0
+            escalation_note = "Checklist completed: no danger items reported."
+            escalation_completed = True
+        elif responses_checked:
+            escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
+            escalation_note = "Checklist completed."
+            escalation_completed = True
+        else:
+            escalation_score = None
+            escalation_note = "Checklist not completed."
+            escalation_completed = False
+        # Process messages
+        messages = [msg1, msg2, msg3]
+        active = [(m, f"Message {i+1}") for i, m in enumerate(messages) if m.strip()]
+        if not active:
+            return "Please enter at least one message.", None
+        # Detect threats
+        def normalize(text):
+            import unicodedata
+            text = text.lower().strip()
+            text = unicodedata.normalize("NFKD", text)
+            text = text.replace("'", "'")
+            return re.sub(r"[^a-z0-9 ]", "", text)
+        def detect_threat_motifs(message, motif_list):
+            norm_msg = normalize(message)
+            return [motif for motif in motif_list if normalize(motif) in norm_msg]
+        # Analyze threats and patterns
+        immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
+        flat_threats = [t for sublist in immediate_threats for t in sublist]
+        threat_risk = "Yes" if flat_threats else "No"
+        # Analyze each message
+        results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
+        # Extract scores and metadata
+        abuse_scores = [r[0][0] for r in results]
+        stages = [r[0][4] for r in results]
+        darvo_scores = [r[0][5] for r in results]
+        tone_tags = [r[0][6] for r in results]
+        dates_used = [r[1] for r in results]
+        # Analyze patterns
+        predicted_labels = [label for r in results for label in r[0][1]]
+        high = {'control'}
+        moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults',
+                   'contradictory statements', 'guilt tripping'}
+        low = {'blame shifting', 'projection', 'recovery phase'}
+        counts = {'high': 0, 'moderate': 0, 'low': 0}
+        for label in predicted_labels:
+            if label in high:
+                counts['high'] += 1
+            elif label in moderate:
+                counts['moderate'] += 1
+            elif label in low:
+                counts['low'] += 1
+        # Determine pattern escalation risk
+        if counts['high'] >= 2 and counts['moderate'] >= 2:
+            pattern_escalation_risk = "Critical"
+        elif (counts['high'] >= 2 and counts['moderate'] >= 1) or \
+             (counts['moderate'] >= 3) or \
+             (counts['high'] >= 1 and counts['moderate'] >= 2):
+            pattern_escalation_risk = "High"
+        elif (counts['moderate'] == 2) or \
+             (counts['high'] == 1 and counts['moderate'] == 1) or \
+             (counts['moderate'] == 1 and counts['low'] >= 2) or \
+             (counts['high'] == 1 and sum(counts.values()) == 1):
+            pattern_escalation_risk = "Moderate"
+        else:
+            pattern_escalation_risk = "Low"
+        # Calculate escalation risk
+        checklist_escalation_risk = "Unknown" if escalation_score is None else (
+            "Critical" if escalation_score >= 20 else
+            "Moderate" if escalation_score >= 10 else
+            "Low"
         )
+        # Calculate escalation bump
+        escalation_bump = 0
+        for result, _ in results:
+            abuse_score, _, _, sentiment, stage, darvo_score, tone_tag = result
+            if darvo_score > 0.65:
+                escalation_bump += 3
+            if tone_tag in ["forced accountability flip", "emotional threat"]:
+                escalation_bump += 2
+            if abuse_score > 80:
+                escalation_bump += 2
+            if stage == 2:
+                escalation_bump += 3
+        # Calculate combined risk
+        def rank(label):
+            return {"Low": 0, "Moderate": 1, "High": 2, "Critical": 3, "Unknown": 0}.get(label, 0)
+        combined_score = rank(pattern_escalation_risk) + rank(checklist_escalation_risk) + escalation_bump
+        escalation_risk = (
+            "Critical" if combined_score >= 6 else
+            "High" if combined_score >= 4 else
+            "Moderate" if combined_score >= 2 else
+            "Low"
         )
+        # Build escalation text
+        if escalation_score is None:
+            escalation_text = (
+                "🚫 **Escalation Potential: Unknown** (Checklist not completed)\n"
+                "⚠️ This section was not completed. Escalation potential is estimated using message data only.\n"
+            )
+            hybrid_score = 0
+        elif escalation_score == 0:
+            escalation_text = (
+                "✅ **Escalation Checklist Completed:** No danger items reported.\n"
+                "🧭 **Escalation potential estimated from detected message patterns only.**\n"
+                f"• Pattern Risk: {pattern_escalation_risk}\n"
+                f"• Checklist Risk: None reported\n"
+                f"• Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
+            )
+            hybrid_score = escalation_bump
+        else:
+            hybrid_score = escalation_score + escalation_bump
+            escalation_text = (
+                f"📈 **Escalation Potential: {escalation_risk} ({hybrid_score}/29)**\n"
+                "📋 This score combines your safety checklist answers *and* detected high-risk behavior.\n"
+                f"• Pattern Risk: {pattern_escalation_risk}\n"
+                f"• Checklist Risk: {checklist_escalation_risk}\n"
+                f"• Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
+            )
+        # Calculate composite abuse score
+        composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
+        # Get most common stage
+        most_common_stage = max(set(stages), key=stages.count)
+        stage_text = RISK_STAGE_LABELS[most_common_stage]
+        # Get top labels
+        top_labels = []
+        for result, _ in results:
+            threshold_labels = result[1]
+            if threshold_labels:
+                top_labels.append(threshold_labels[0])
+            else:
+                top_labels.append("none")
+        # Calculate average DARVO score
+        avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
+        darvo_blurb = ""
+        if avg_darvo > 0.25:
+            level = "moderate" if avg_darvo < 0.65 else "high"
+            darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
+        # Build output text
+        out = f"Abuse Intensity: {composite_abuse}%\n"
+        out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
+        out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
+        out += f"\n\n{stage_text}"
+        out += darvo_blurb
+        out += "\n\n🎭 **Emotional Tones Detected:**\n"
+        for i, tone in enumerate(tone_tags):
+            out += f"• Message {i+1}: *{tone or 'none'}*\n"
+        # Add threat section
+        if flat_threats:
+            out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
+            for t in set(flat_threats):
+                out += f"• \"{t}\"\n"
+            out += "\n⚠️ These phrases may indicate an imminent risk to physical safety."
+        else:
+            out += "\n\n🧩 **Immediate Danger Threats:** None explicitly detected.\n"
+            out += "This does *not* rule out risk, but no direct threat phrases were matched."
+        # Generate timeline
+        pattern_labels = [
+            pats[0][0] if (pats := r[0][2]) else "none"
+            for r in results
+        ]
+        timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
+        # Add escalation text
+        out += "\n\n" + escalation_text
+        return out, timeline_image
+    except Exception as e:
+        logger.error(f"Error in analyze_composite: {e}")
+        return "An error occurred during analysis.", None
+# Gradio Interface Setup
+def create_interface():
+    try:
+        textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
+        quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
+        none_box = gr.Checkbox(label="None of the above")
+        demo = gr.Interface(
+            fn=analyze_composite,
+            inputs=textbox_inputs + quiz_boxes + [none_box],
+            outputs=[
+                gr.Textbox(label="Results"),
+                gr.Image(label="Abuse Score Timeline", type="pil")
+            ],
+            title="Abuse Pattern Detector + Escalation Quiz",
+            description=(
+                "Enter up to three messages that concern you. "
+                "For the most accurate results, include messages from a recent emotionally intense period."
+            ),
+            flagging_mode="manual"
+        )
+        return demo
+    except Exception as e:
+        logger.error(f"Error creating interface: {e}")
+        raise
+# Main execution
+if __name__ == "__main__":
+    try:
+        demo = create_interface()
+        demo.launch(share=True)
+    except Exception as e:
+        logger.error(f"Failed to launch app: {e}")
+        raise