import gradio as gr import torch from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer from PIL import Image import io import easyocr import numpy as np import pandas as pd # ——— Load and preprocess NRC EmoLex —————————————————————————————————— # Make sure this filename matches exactly what you’ve uploaded EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt" # Load the raw triples emo_raw = pd.read_csv( EMOLEX_PATH, sep="\t", names=["word","emotion","flag"], comment="#", # skip any commented lines header=None ) # Pivot: word → { emotion: 0 or 1, … } emo_df = ( emo_raw .pivot(index="word", columns="emotion", values="flag") .fillna(0) .astype(int) ) # Final lookup dict: EMOLEX["happy"]["joy"] == 1 EMOLEX = emo_df.to_dict(orient="index") def score_emolex(text_lower): # count how many times each emotion appears in the lexicon counts = {emo: 0 for emo in emo_df.columns} for tok in text_lower.split(): if tok in EMOLEX: for emo, flag in EMOLEX[tok].items(): counts[emo] += flag return counts import re # ——— Load MPQA Subjectivity Lexicon ————————————————————————————————————————————— MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff" # mpqa_lex[word] = list of feature‐dicts for that word mpqa_lex = {} with open(MPQA_PATH, encoding="utf-8") as f: for line in f: line = line.strip() if not line or line.startswith("#"): continue # each line looks like: type=strongsubj len=1 word1=abandon pos1=verb stemmed1=y priorpolarity=negative fields = dict(item.split("=",1) for item in line.split()) w = fields.pop("word1").lower() mpqa_lex.setdefault(w, []).append(fields) # e.g. mpqa_lex["abandon"] == [ {'type':'strongsubj','len':'1','pos1':'verb','stemmed1':'y','priorpolarity':'negative'} ] # ——— In your get_emotional_tone_tag, just after you split words… —————————————————————— words = text_lower.split() # count MPQA hits mpqa_counts = { "strongsubj": 0, "weaksubj": 0, "positive": 0, "negative": 0, } for w in words: for entry in mpqa_lex.get(w, []): mpqa_counts[ entry["type"] ] += 1 mpqa_counts[ entry["priorpolarity"] ] += 1 # now you can reference mpqa_counts["negative"], etc. # for example, tweak your “Emotional Threat” rule to require at least one strong negative subj: if (anger + disgust) > 0.5 \ and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \ and mpqa_counts["strongsubj"] > 0 \ and any(p in patterns for p in ["control","threat","insults","dismissiveness"]): return "emotional threat" # ——— 1) Emotion Pipeline ———————————————————————————————————————————————— emotion_pipeline = hf_pipeline( "text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None, truncation=True ) def get_emotion_profile(text): results = emotion_pipeline(text) if isinstance(results, list) and isinstance(results[0], list): results = results[0] return {r["label"].lower(): round(r["score"], 3) for r in results} # apology keywords for pleading concern APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"] # ——— 2) Abuse-Patterns Model —————————————————————————————————————————————— model_name = "SamanthaStorm/tether-multilabel-v3" model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) LABELS = [ "blame shifting", "contradictory statements", "control", "dismissiveness", "gaslighting", "guilt tripping", "insults", "obscure language", "projection", "recovery phase", "threat" ] THRESHOLDS = { "blame shifting": 0.28, "contradictory statements": 0.27, "control": 0.08, "dismissiveness": 0.32, "gaslighting": 0.27, "guilt tripping": 0.31, "insults": 0.10, "obscure language": 0.55, "projection": 0.09, "recovery phase": 0.33, "threat": 0.15 } # ——— 3) Initialize EasyOCR reader ———————————————————————————————————————————— ocr_reader = easyocr.Reader(["en"], gpu=False) # ——— 4) Emotional-Tone Tagging ————————————————————————————————————————————— def get_emotional_tone_tag(emotion_profile, patterns, text_lower): """ Assigns one of 18 nuanced tone categories based on model scores, NRC-EmoLex counts, detected patterns, and text. """ # unpack model emotion scores sadness = emotion_profile.get("sadness", 0) joy = emotion_profile.get("joy", 0) neutral = emotion_profile.get("neutral", 0) disgust = emotion_profile.get("disgust", 0) anger = emotion_profile.get("anger", 0) fear = emotion_profile.get("fear", 0) surprise = emotion_profile.get("surprise", 0) # count lexicon hits for the big five words = text_lower.split() lex_counts = { emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words) for emo in ["anger","joy","sadness","fear","disgust"] } # 0. Support override if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]): return "supportive" # 1. Performative Regret if sadness > 0.4 \ and (lex_counts["sadness"] > 0 or any(p in patterns for p in ["blame shifting","guilt tripping","recovery phase"])): return "performative regret" # 2. Coercive Warmth if (joy > 0.3 or sadness > 0.4) \ and (lex_counts["joy"] > 0 or lex_counts["sadness"] > 0) \ and any(p in patterns for p in ["control","gaslighting"]): return "coercive warmth" # 3. Cold Invalidation if (neutral + disgust) > 0.5 \ and lex_counts["disgust"] > 0 \ and any(p in patterns for p in ["dismissiveness","projection","obscure language"]): return "cold invalidation" # 4. Genuine Vulnerability if (sadness + fear) > 0.5 \ and lex_counts["sadness"] > 0 and lex_counts["fear"] > 0 \ and all(p == "recovery phase" for p in patterns): return "genuine vulnerability" # 5. Emotional Threat if (anger + disgust) > 0.5 \ and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \ and any(p in patterns for p in ["control","threat","insults","dismissiveness"]): return "emotional threat" # 6. Weaponized Sadness if sadness > 0.6 \ and lex_counts["sadness"] > 0 \ and any(p in patterns for p in ["guilt tripping","projection"]): return "weaponized sadness" # 7. Toxic Resignation if neutral > 0.5 \ and any(p in patterns for p in ["dismissiveness","obscure language"]) \ and lex_counts["disgust"] == 0: return "toxic resignation" # 8. Indignant Reproach if anger > 0.5 \ and lex_counts["anger"] > 0 \ and any(p in patterns for p in ["guilt tripping","contradictory statements"]): return "indignant reproach" # 9. Confrontational if anger > 0.6 \ and lex_counts["anger"] > 0 \ and patterns: return "confrontational" # 10. Passive Aggression if neutral > 0.6 \ and lex_counts["disgust"] > 0 \ and any(p in patterns for p in ["dismissiveness","projection"]): return "passive aggression" # 11. Sarcastic Mockery if joy > 0.3 \ and lex_counts["joy"] > 0 \ and "insults" in patterns: return "sarcastic mockery" # 12. Menacing Threat if fear > 0.3 \ and lex_counts["fear"] > 0 \ and "threat" in patterns: return "menacing threat" # 13. Pleading Concern if sadness > 0.3 \ and lex_counts["sadness"] > 0 \ and any(k in text_lower for k in APOLOGY_KEYWORDS) \ and not patterns: return "pleading concern" # 14. Fear-mongering if (fear + disgust) > 0.5 \ and lex_counts["fear"] > 0 \ and "projection" in patterns: return "fear-mongering" # 15. Disbelieving Accusation if surprise > 0.3 \ and lex_counts["surprise"] > 0 \ and "blame shifting" in patterns: return "disbelieving accusation" # 16. Empathetic Solidarity if joy > 0.2 and sadness > 0.2 \ and lex_counts["joy"] > 0 and lex_counts["sadness"] > 0 \ and not patterns: return "empathetic solidarity" # 17. Assertive Boundary if anger > 0.4 \ and lex_counts["anger"] > 0 \ and "control" in patterns: return "assertive boundary" # 18. Stonewalling if neutral > 0.7 \ and lex_counts["disgust"] == 0 \ and not patterns: return "stonewalling" return None # ——— 5) Single message analysis ——————————————————————————————————————————— def analyze_message(text): text_lower = text.lower() emotion_profile = get_emotion_profile(text) # 2a. get lexicon counts lex_counts = score_emolex(text_lower) max_lex = max(lex_counts.values()) or 1.0 # avoid div0 # 2b. normalize them to [0,1] lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()} # 2c. blend: take the max of transformer & lexicon for emo in emotion_profile: emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo, 0)) toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): logits = model(**toks).logits.squeeze(0) scores = torch.sigmoid(logits).cpu().numpy() active_patterns = [label for label, prob in zip(LABELS, scores) if prob >= THRESHOLDS[label]] if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns: active_patterns.append("recovery phase") tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower) return {"emotion_profile": emotion_profile, "active_patterns": active_patterns, "tone_tag": tone_tag} # ——— 6) Composite wrapper ——————————————————————————————————————————————— def analyze_composite(uploaded_file, *texts): outputs = [] if uploaded_file is not None: try: raw = uploaded_file.read() except Exception: with open(uploaded_file, "rb") as f: raw = f.read() name = ( uploaded_file.name.lower() if hasattr(uploaded_file, "name") else uploaded_file.lower() ) if name.endswith((".png",".jpg",".jpeg",".tiff",".bmp",".gif")): img = Image.open(io.BytesIO(raw)) arr = np.array(img.convert("RGB")) texts_ocr = ocr_reader.readtext(arr, detail=0) content = "\n".join(texts_ocr) else: try: content = raw.decode("utf-8") except UnicodeDecodeError: content = raw.decode("latin-1") r = analyze_message(content) outputs.append( "── Uploaded File ──\n" f"Emotion Profile : {r['emotion_profile']}\n" f"Active Patterns : {r['active_patterns']}\n" f"Emotional Tone : {r['tone_tag']}\n" ) for idx, txt in enumerate(texts, start=1): if not txt: continue r = analyze_message(txt) outputs.append( f"── Message {idx} ──\n" f"Emotion Profile : {r['emotion_profile']}\n" f"Active Patterns : {r['active_patterns']}\n" f"Emotional Tone : {r['tone_tag']}\n" ) if not outputs: return "Please enter at least one message." return "\n".join(outputs) # ——— 7) Gradio interface ——————————————————————————————————————————————— message_inputs = [gr.Textbox(label="Message")] iface = gr.Interface( fn=analyze_composite, inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs, outputs=gr.Textbox(label="Analysis"), title="Tether Analyzer (extended tone tags)", description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO." ) if __name__ == "__main__": iface.launch()