Spaces:

SamanthaStorm
/

TetherSST

Sleeping

File size: 11,839 Bytes

239a968
70ce6b1
 
cd900c5
 
384efe9
 
dd699d2
 
 
 
 
 
 
 
4db4868
dd699d2
 
 
 
 
 
 
 
 
4db4868
dd699d2
 
 
 
 
 
 
a37b7df
 
 
 
 
 
 
 
 
d25e518
 
 
 
 
 
 
 
 
 
 
 
a37b7df
 
 
70ce6b1
fe6b66c
 
 
 
 
 
 
70ce6b1
 
 
 
 
4afc141
70ce6b1
4afc141
70ce6b1
4afc141
70ce6b1
 
 
 
 
 
 
 
4afc141
 
 
 
 
 
 
 
 
 
 
70ce6b1
 
384efe9
 
 
 
9d64e69
 
 
 
 
 
 
 
 
4db4868
a20729d
 
 
 
 
7020ff2
4db4868
 
 
 
 
 
 
49b9d9d
 
3cc85b8
384efe9
e905632
 
 
 
 
c96a489
7020ff2
49b9d9d
7020ff2
 
 
c96a489
7020ff2
49b9d9d
 
a20729d
7020ff2
c96a489
7020ff2
49b9d9d
 
a20729d
7020ff2
c96a489
7020ff2
49b9d9d
 
a20729d
7020ff2
c96a489
7020ff2
49b9d9d
 
a20729d
7020ff2
c96a489
7020ff2
49b9d9d
 
 
7020ff2
9d64e69
7020ff2
49b9d9d
 
a20729d
7020ff2
49b9d9d
7020ff2
49b9d9d
7020ff2
 
 
49b9d9d
7020ff2
49b9d9d
 
 
7020ff2
49b9d9d
7020ff2
49b9d9d
 
 
7020ff2
49b9d9d
7020ff2
49b9d9d
 
 
7020ff2
49b9d9d
7020ff2
49b9d9d
 
a20729d
 
7020ff2
9d64e69
7020ff2
49b9d9d
 
a20729d
7020ff2
9d64e69
7020ff2
49b9d9d
 
a20729d
7020ff2
9d64e69
7020ff2
49b9d9d
 
a20729d
7020ff2
49b9d9d
7020ff2
49b9d9d
7020ff2
 
 
49b9d9d
7020ff2
49b9d9d
7020ff2
 
 
9d64e69
93ddbae
 
4db4868
834f0ff
4afc141
d14c860
dd699d2
d25e518
4db4868
 
d25e518
dd699d2
4db4868
 
 
d25e518
70ce6b1
 
 
4db4868
4afc141
 
4db4868
4afc141
4db4868
 
 
 
 
70ce6b1
384efe9
93ddbae
70ce6b1
4db4868
d25e518
93ddbae
5ebe61a
4b0f5e3
4db4868
4b0f5e3
 
 
4db4868
 
d25e518
 
4db4868
4b0f5e3
 
 
 
 
 
 
 
 
 
 
 
 
4db4868
d25e518
70ce6b1
76dedd8
 
 
93ddbae
70ce6b1
cd900c5
 
 
76dedd8
4db4868
76dedd8
 
4b0f5e3
3f2016a
 
 
d25e518
fe6b66c
70ce6b1
384efe9
70ce6b1
4afc141
 
fe6b66c
 
70ce6b1
4db4868

import gradio as gr
import torch
from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import io
import easyocr
import numpy as np
import pandas as pd

# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
emo_raw = pd.read_csv(
    EMOLEX_PATH,
    sep="\t",
    names=["word","emotion","flag"],
    comment="#",
    header=None
)
emo_df = (
    emo_raw
    .pivot(index="word", columns="emotion", values="flag")
    .fillna(0)
    .astype(int)
)
EMOLEX = emo_df.to_dict(orient="index")

def score_emolex(text_lower):
    counts = {emo: 0 for emo in emo_df.columns}
    for tok in text_lower.split():
        if tok in EMOLEX:
            for emo, flag in EMOLEX[tok].items():
                counts[emo] += flag
    return counts

# ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
mpqa_lex = {}
with open(MPQA_PATH, encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line or line.startswith("#"):
            continue

        # build fields dict but skip any token without '='
        fields = {}
        for item in line.split():
            if "=" not in item:
                continue
            key, val = item.split("=", 1)
            fields[key] = val

        # must have word1
        if "word1" not in fields:
            continue
        w = fields.pop("word1").lower()
        mpqa_lex.setdefault(w, []).append(fields)

# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
emotion_pipeline = hf_pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    truncation=True
)
def get_emotion_profile(text):
    results = emotion_pipeline(text)
    if isinstance(results, list) and isinstance(results[0], list):
        results = results[0]
    return {r["label"].lower(): round(r["score"], 3) for r in results}

APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]

# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
model_name = "SamanthaStorm/tether-multilabel-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

LABELS = [
    "blame shifting", "contradictory statements", "control", "dismissiveness",
    "gaslighting", "guilt tripping", "insults", "obscure language",
    "projection", "recovery phase", "threat"
]
THRESHOLDS = {
    "blame shifting": 0.28,
    "contradictory statements": 0.27,
    "control": 0.08,
    "dismissiveness": 0.32,
    "gaslighting": 0.27,
    "guilt tripping": 0.31,
    "insults": 0.10,
    "obscure language": 0.55,
    "projection": 0.09,
    "recovery phase": 0.33,
    "threat": 0.15
}

# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
ocr_reader = easyocr.Reader(["en"], gpu=False)

# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
    sadness  = emotion_profile.get("sadness",  0)
    joy      = emotion_profile.get("joy",      0)
    neutral  = emotion_profile.get("neutral",  0)
    disgust  = emotion_profile.get("disgust",  0)
    anger    = emotion_profile.get("anger",    0)
    fear     = emotion_profile.get("fear",     0)
    surprise = emotion_profile.get("surprise", 0)

    # NRC-EmoLex counts
    words = text_lower.split()
    lex_counts = {
        emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
        for emo in ["anger","joy","sadness","fear","disgust"]
    }

    # MPQA counts
    mpqa_counts = {"strongsubj":0,"weaksubj":0,"positive":0,"negative":0}
    for w in words:
        for entry in mpqa_lex.get(w, []):
            mpqa_counts[entry["type"]]          += 1
            mpqa_counts[entry["priorpolarity"]] += 1

    # 0. Support override
    if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
        return "supportive"

+    # 1. Performative Regret
+    # → only when we see one of those patterns, not just lexicon hits
+    if sadness > 0.4 \
+       and any(p in patterns for p in ["blame shifting","guilt tripping","recovery phase"]):
+        return "performative regret"

    # 2. Coercive Warmth
    if (joy > 0.3 or sadness > 0.4) \
       and (lex_counts["joy"] > 0 or lex_counts["sadness"] > 0) \
       and any(p in patterns for p in ["control","gaslighting"]):
        return "coercive warmth"

    # 3. Cold Invalidation
    if (neutral + disgust) > 0.5 \
       and lex_counts["disgust"] > 0 \
       and any(p in patterns for p in ["dismissiveness","projection","obscure language"]):
        return "cold invalidation"

    # 4. Genuine Vulnerability
    if (sadness + fear) > 0.5 \
       and lex_counts["sadness"] > 0 and lex_counts["fear"] > 0 \
       and all(p == "recovery phase" for p in patterns):
        return "genuine vulnerability"

    # 5. Emotional Threat
    if (anger + disgust) > 0.5 \
       and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
       and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
        return "emotional threat"

    # 6. Weaponized Sadness
    if sadness > 0.6 \
       and lex_counts["sadness"] > 0 \
       and any(p in patterns for p in ["guilt tripping","projection"]):
        return "weaponized sadness"

    # 7. Toxic Resignation
    if neutral > 0.5 \
       and any(p in patterns for p in ["dismissiveness","obscure language"]) \
       and lex_counts["disgust"] == 0:
        return "toxic resignation"

    # 8. Indignant Reproach
    if anger > 0.5 \
       and lex_counts["anger"] > 0 \
       and any(p in patterns for p in ["guilt tripping","contradictory statements"]):
        return "indignant reproach"

    # 9. Confrontational
    if anger > 0.6 \
       and lex_counts["anger"] > 0 \
       and patterns:
        return "confrontational"

    # 10. Passive Aggression
    if neutral > 0.6 \
       and lex_counts["disgust"] > 0 \
       and any(p in patterns for p in ["dismissiveness","projection"]):
        return "passive aggression"

    # 11. Sarcastic Mockery
    if joy > 0.3 \
       and lex_counts["joy"] > 0 \
       and "insults" in patterns:
        return "sarcastic mockery"

    # 12. Menacing Threat
    if fear > 0.3 \
       and lex_counts["fear"] > 0 \
       and "threat" in patterns:
        return "menacing threat"

    # 13. Pleading Concern
    if sadness > 0.3 \
       and lex_counts["sadness"] > 0 \
       and any(k in text_lower for k in APOLOGY_KEYWORDS) \
       and not patterns:
        return "pleading concern"

    # 14. Fear-mongering
    if (fear + disgust) > 0.5 \
       and lex_counts["fear"] > 0 \
       and "projection" in patterns:
        return "fear-mongering"

    # 15. Disbelieving Accusation
    if surprise > 0.3 \
       and lex_counts["surprise"] > 0 \
       and "blame shifting" in patterns:
        return "disbelieving accusation"

    # 16. Empathetic Solidarity
    if joy > 0.2 and sadness > 0.2 \
       and lex_counts["joy"] > 0 and lex_counts["sadness"] > 0 \
       and not patterns:
        return "empathetic solidarity"

    # 17. Assertive Boundary
    if anger > 0.4 \
       and lex_counts["anger"] > 0 \
       and "control" in patterns:
        return "assertive boundary"

    # 18. Stonewalling
    if neutral > 0.7 \
       and lex_counts["disgust"] == 0 \
       and not patterns:
        return "stonewalling"

    return None

# ——— 5) Single-message analysis ———————————————————————————————————————————
def analyze_message(text):
    text_lower = text.lower()
    emotion_profile = get_emotion_profile(text)

    # blend in NRC-EmoLex
    lex_counts = score_emolex(text_lower)
    max_lex    = max(lex_counts.values()) or 1.0
    lex_scores = {emo: cnt/max_lex for emo, cnt in lex_counts.items()}
    for emo in emotion_profile:
        emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))

    # abuse-patterns
    toks   = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        logits = model(**toks).logits.squeeze(0)
    scores = torch.sigmoid(logits).cpu().numpy()
    active_patterns = [lab for lab, sc in zip(LABELS, scores) if sc >= THRESHOLDS[lab]]
    if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
        active_patterns.append("recovery phase")

    tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
    return {
        "emotion_profile": emotion_profile,
        "active_patterns": active_patterns,
        "tone_tag": tone_tag
    }

# ——— 6) Composite wrapper ———————————————————————————————————————————————
def analyze_composite(uploaded_file, *texts):
    outputs = []

    # file handling / OCR
    if uploaded_file is not None:
        try:
            raw = uploaded_file.read()
        except:
            with open(uploaded_file, "rb") as f:
                raw = f.read()

        name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
        if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
            img     = Image.open(io.BytesIO(raw))
            arr     = np.array(img.convert("RGB"))
            content = "\n".join(ocr_reader.readtext(arr, detail=0))
        else:
            try:
                content = raw.decode("utf-8")
            except UnicodeDecodeError:
                content = raw.decode("latin-1")

        r = analyze_message(content)
        outputs.append(
            "── Uploaded File ──\n"
            f"Emotion Profile : {r['emotion_profile']}\n"
            f"Active Patterns : {r['active_patterns']}\n"
            f"Emotional Tone  : {r['tone_tag']}\n"
        )

    # free-text messages
    for idx, txt in enumerate(texts, start=1):
        if not txt:
            continue
        r = analyze_message(txt)
        outputs.append(
            f"── Message {idx} ──\n"
            f"Emotion Profile : {r['emotion_profile']}\n"
            f"Active Patterns : {r['active_patterns']}\n"
            f"Emotional Tone  : {r['tone_tag']}\n"
        )

    if not outputs:
        return "Please enter at least one message."
    return "\n".join(outputs)

# ——— 7) Gradio interface ———————————————————————————————————————————————
message_inputs = [gr.Textbox(label="Message")]

iface = gr.Interface(
    fn=analyze_composite,
    inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
    outputs=gr.Textbox(label="Analysis"),
    title="Tether Analyzer (extended tone tags)",
    description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO."
)

if __name__ == "__main__":
    iface.launch()