import gradio as gr
import torch
from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import io
import easyocr
import numpy as np
import pandas as pd

# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
emo_raw = pd.read_csv(
    EMOLEX_PATH,
    sep="\t",
    names=["word","emotion","flag"],
    comment="#",
    header=None
)
emo_df = (
    emo_raw
    .pivot(index="word", columns="emotion", values="flag")
    .fillna(0)
    .astype(int)
)
EMOLEX = emo_df.to_dict(orient="index")

def score_emolex(text_lower):
    counts = {emo: 0 for emo in emo_df.columns}
    for tok in text_lower.split():
        if tok in EMOLEX:
            for emo, flag in EMOLEX[tok].items():
                counts[emo] += flag
    return counts

# ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
mpqa_lex = {}
with open(MPQA_PATH, encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line or line.startswith("#"):
            continue

        # build fields dict but skip any token without '='
        fields = {}
        for item in line.split():
            if "=" not in item:
                continue
            key, val = item.split("=", 1)
            fields[key] = val

        # must have word1
        if "word1" not in fields:
            continue
        w = fields.pop("word1").lower()
        mpqa_lex.setdefault(w, []).append(fields)

# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
emotion_pipeline = hf_pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    truncation=True
)
def get_emotion_profile(text):
    results = emotion_pipeline(text)
    if isinstance(results, list) and isinstance(results[0], list):
        results = results[0]
    return {r["label"].lower(): round(r["score"], 3) for r in results}

APOLOGY_KEYWORDS = ["sorry", "apology", "forgive"]

# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
model_name = "SamanthaStorm/tether-multilabel-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

LABELS = [
    "blame shifting", "contradictory statements", "control", "dismissiveness",
    "gaslighting", "guilt tripping", "insults", "obscure language",
    "projection", "recovery phase", "threat"
]
THRESHOLDS = {
    "blame shifting": 0.28,
    "contradictory statements": 0.27,
    "control": 0.08,
    "dismissiveness": 0.32,
    "gaslighting": 0.27,
    "guilt tripping": 0.31,
    "insults": 0.10,
    "obscure language": 0.55,
    "projection": 0.09,
    "recovery phase": 0.33,
    "threat": 0.15
}

# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
ocr_reader = easyocr.Reader(["en"], gpu=False)

# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
    sadness  = emotion_profile.get("sadness",  0)
    joy      = emotion_profile.get("joy",      0)
    neutral  = emotion_profile.get("neutral",  0)
    disgust  = emotion_profile.get("disgust",  0)
    anger    = emotion_profile.get("anger",    0)
    fear     = emotion_profile.get("fear",     0)


    # NRC-EmoLex counts
    words = text_lower.split()
    lex_counts = {
        emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
        for emo in ["anger","joy","sadness","fear","disgust"]
    }

    # MPQA counts
    mpqa_counts = {"strongsubj":0,"weaksubj":0,"positive":0,"negative":0}
    for w in words:
        for entry in mpqa_lex.get(w, []):
            mpqa_counts[entry["type"]]          += 1
            mpqa_counts[entry["priorpolarity"]] += 1

    # 0. Support override
    if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
        return "supportive"


    if sadness > 0.4 \
        and any(p in patterns for p in ["blame shifting","guilt tripping","recovery phase"]):
        return "performative regret"

    # 2. Coercive Warmth
    if (joy > 0.3 or sadness > 0.4) \
        and (lex_counts["joy"] > 0 or lex_counts["sadness"] > 0) \
        and any(p in patterns for p in ["control","gaslighting"]):
        return "coercive warmth"

    # 3. Cold Invalidation
    if (neutral + disgust) > 0.5 \
        and lex_counts["disgust"] > 0 \
        and any(p in patterns for p in ["dismissiveness","projection","obscure language"]):
        return "cold invalidation"

    # 4. Genuine Vulnerability
    if (sadness + fear) > 0.5 \
        and lex_counts["sadness"] > 0 and lex_counts["fear"] > 0 \
        and all(p == "recovery phase" for p in patterns):
        return "genuine vulnerability"

    # 5. Emotional Threat
    if (anger + disgust) > 0.5 \
        and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
        and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
        return "emotional threat"

    # 6. Weaponized Sadness
    if sadness > 0.6 \
        and lex_counts["sadness"] > 0 \
        and any(p in patterns for p in ["guilt tripping","projection"]):
        return "weaponized sadness"

    # 7. Toxic Resignation
    if neutral > 0.5 \
        and any(p in patterns for p in ["dismissiveness","obscure language"]) \
        and lex_counts["disgust"] == 0:
        return "toxic resignation"

    # 8. Indignant Reproach
    if anger > 0.5 \
        and lex_counts["anger"] > 0 \
        and any(p in patterns for p in ["guilt tripping","contradictory statements"]):
        return "indignant reproach"

    # 9. Confrontational
    if anger > 0.6 \
        and lex_counts["anger"] > 0 \
        and patterns:
        return "confrontational"

    # 10. Passive Aggression
    if neutral > 0.6 \
        and lex_counts["disgust"] > 0 \
        and any(p in patterns for p in ["dismissiveness","projection"]):
        return "passive aggression"

    # 11. Sarcastic Mockery
    if joy > 0.3 \
        and lex_counts["joy"] > 0 \
        and "insults" in patterns:
        return "sarcastic mockery"

    # 12. Menacing Threat
    if fear > 0.3 \
        and lex_counts["fear"] > 0 \
        and "threat" in patterns:
        return "menacing threat"

    # 13. Pleading Concern
    if sadness > 0.3 \
        and lex_counts["sadness"] > 0 \
        and any(k in text_lower for k in APOLOGY_KEYWORDS) \
        and not patterns:
        return "pleading concern"

    # 14. Fear-mongering
    if (fear + disgust) > 0.5 \
        and lex_counts["fear"] > 0 \
        and "projection" in patterns:
        return "fear-mongering"


    # 16. Empathetic Solidarity
    if joy > 0.2 and sadness > 0.2 \
        and lex_counts["joy"] > 0 and lex_counts["sadness"] > 0 \
        and not patterns:
        return "empathetic solidarity"

    # 17. Assertive Boundary
    if anger > 0.4 \
        and lex_counts["anger"] > 0 \
        and "control" in patterns:
        return "assertive boundary"

    # 18. Stonewalling
    if neutral > 0.7 \
        and lex_counts["disgust"] == 0 \
        and not patterns:
        return "stonewalling"

    return None

# ——— 5) Single-message analysis ———————————————————————————————————————————
def analyze_message(text):
    text_lower = text.lower()
    emotion_profile = get_emotion_profile(text)

    # blend in NRC-EmoLex
    lex_counts = score_emolex(text_lower)
    max_lex    = max(lex_counts.values()) or 1.0
    lex_scores = {emo: cnt/max_lex for emo, cnt in lex_counts.items()}
    for emo in emotion_profile:
        emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))

    # abuse-patterns
    toks   = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        logits = model(**toks).logits.squeeze(0)
    scores = torch.sigmoid(logits).cpu().numpy()
    active_patterns = [lab for lab, sc in zip(LABELS, scores) if sc >= THRESHOLDS[lab]]
    if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
        active_patterns.append("recovery phase")

    tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
    return {
        "emotion_profile": emotion_profile,
        "active_patterns": active_patterns,
        "tone_tag": tone_tag
    }

# ——— 6) Composite wrapper ———————————————————————————————————————————————
def analyze_composite(uploaded_file, *texts):
    outputs = []

    # file handling / OCR
    if uploaded_file is not None:
        try:
            raw = uploaded_file.read()
        except:
            with open(uploaded_file, "rb") as f:
                raw = f.read()

        name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
        if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
            img     = Image.open(io.BytesIO(raw))
            arr     = np.array(img.convert("RGB"))
            content = "\n".join(ocr_reader.readtext(arr, detail=0))
        else:
            try:
                content = raw.decode("utf-8")
            except UnicodeDecodeError:
                content = raw.decode("latin-1")

        r = analyze_message(content)
        outputs.append(
            "── Uploaded File ──\n"
            f"Emotion Profile : {r['emotion_profile']}\n"
            f"Active Patterns : {r['active_patterns']}\n"
            f"Emotional Tone  : {r['tone_tag']}\n"
        )

    # free-text messages
    for idx, txt in enumerate(texts, start=1):
        if not txt:
            continue
        r = analyze_message(txt)
        outputs.append(
            f"── Message {idx} ──\n"
            f"Emotion Profile : {r['emotion_profile']}\n"
            f"Active Patterns : {r['active_patterns']}\n"
            f"Emotional Tone  : {r['tone_tag']}\n"
        )

    if not outputs:
        return "Please enter at least one message."
    return "\n".join(outputs)

# ——— 7) Gradio interface ———————————————————————————————————————————————
message_inputs = [gr.Textbox(label="Message")]

iface = gr.Interface(
    fn=analyze_composite,
    inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
    outputs=gr.Textbox(label="Analysis"),
    title="Tether Analyzer (extended tone tags)",
    description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO."
)

if __name__ == "__main__":
    iface.launch()