Spaces:
Sleeping
Sleeping
File size: 10,090 Bytes
239a968 70ce6b1 cd900c5 384efe9 dd699d2 fe6b66c 70ce6b1 fe6b66c 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 384efe9 9d64e69 4afc141 9d64e69 384efe9 9d64e69 384efe9 3cc85b8 384efe9 c96a489 384efe9 c96a489 384efe9 c96a489 384efe9 c96a489 384efe9 c96a489 384efe9 c96a489 384efe9 c96a489 9d64e69 3624f82 384efe9 3624f82 9d64e69 384efe9 3624f82 9d64e69 3624f82 93ddbae 9d64e69 384efe9 9d64e69 93ddbae 384efe9 834f0ff 4afc141 d14c860 dd699d2 70ce6b1 384efe9 4afc141 384efe9 70ce6b1 384efe9 93ddbae 70ce6b1 93ddbae 5ebe61a 4b0f5e3 3f2016a 76dedd8 3f2016a 4b0f5e3 3f2016a 4b0f5e3 70ce6b1 76dedd8 93ddbae 70ce6b1 cd900c5 76dedd8 4b0f5e3 3f2016a fe6b66c 70ce6b1 384efe9 70ce6b1 4afc141 fe6b66c 70ce6b1 384efe9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
import gradio as gr
import torch
from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import io
import easyocr
import numpy as np
import pandas as pd
# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
# Make sure this filename matches exactly what you’ve uploaded
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
# Load the raw triples
emo_raw = pd.read_csv(
EMOLEX_PATH,
sep="\t",
names=["word","emotion","flag"],
comment="#", # skip any commented lines
header=None
)
# Pivot: word → { emotion: 0 or 1, … }
emo_df = (
emo_raw
.pivot(index="word", columns="emotion", values="flag")
.fillna(0)
.astype(int)
)
# Final lookup dict: EMOLEX["happy"]["joy"] == 1
EMOLEX = emo_df.to_dict(orient="index")
def score_emolex(text_lower):
# count how many times each emotion appears in the lexicon
counts = {emo: 0 for emo in emo_df.columns}
for tok in text_lower.split():
if tok in EMOLEX:
for emo, flag in EMOLEX[tok].items():
counts[emo] += flag
return counts
# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
emotion_pipeline = hf_pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
top_k=None,
truncation=True
)
def get_emotion_profile(text):
results = emotion_pipeline(text)
if isinstance(results, list) and isinstance(results[0], list):
results = results[0]
return {r["label"].lower(): round(r["score"], 3) for r in results}
# apology keywords for pleading concern
APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
model_name = "SamanthaStorm/tether-multilabel-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
LABELS = [
"blame shifting", "contradictory statements", "control", "dismissiveness",
"gaslighting", "guilt tripping", "insults", "obscure language",
"projection", "recovery phase", "threat"
]
THRESHOLDS = {
"blame shifting": 0.28,
"contradictory statements": 0.27,
"control": 0.08,
"dismissiveness": 0.32,
"gaslighting": 0.27,
"guilt tripping": 0.31,
"insults": 0.10,
"obscure language": 0.55,
"projection": 0.09,
"recovery phase": 0.33,
"threat": 0.15
}
# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
ocr_reader = easyocr.Reader(["en"], gpu=False)
# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
"""
Assigns one of 18 nuanced tone categories based on emotion scores, patterns, and text.
"""
# unpack all emotion scores before any rules
sadness = emotion_profile.get("sadness", 0)
joy = emotion_profile.get("joy", 0)
neutral = emotion_profile.get("neutral", 0)
disgust = emotion_profile.get("disgust", 0)
anger = emotion_profile.get("anger", 0)
fear = emotion_profile.get("fear", 0)
surprise = emotion_profile.get("surprise", 0)
# 0. Support override
if any(k in text_lower for k in ["support", "hope", "grace"]):
return "supportive"
# 1. Performative Regret
if sadness > 0.4 and any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]):
return "performative regret"
# 2. Coercive Warmth
if (joy > 0.3 or sadness > 0.4) and any(p in patterns for p in ["control", "gaslighting"]):
return "coercive warmth"
# 3. Cold Invalidation
if (neutral + disgust) > 0.5 and any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]):
return "cold invalidation"
# 4. Genuine Vulnerability
if (sadness + fear) > 0.5 and all(p == "recovery phase" for p in patterns):
return "genuine vulnerability"
# 5. Emotional Threat
if (anger + disgust) > 0.5 and any(p in patterns for p in ["control", "threat", "insults", "dismissiveness"]):
return "emotional threat"
# 6. Weaponized Sadness
if sadness > 0.6 and any(p in patterns for p in ["guilt tripping", "projection"]):
return "weaponized sadness"
# 7. Toxic Resignation
if neutral > 0.5 and any(p in patterns for p in ["dismissiveness", "obscure language"]):
return "toxic resignation"
# 8. Indignant Reproach
if anger > 0.5 and any(p in patterns for p in ["guilt tripping", "contradictory statements"]):
return "indignant reproach"
# 9. Confrontational
if anger > 0.6 and patterns:
return "confrontational"
# 10. Passive Aggression
if neutral > 0.6 and any(p in patterns for p in ["dismissiveness", "projection"]):
return "passive aggression"
# 11. Sarcastic Mockery
if joy > 0.3 and "insults" in patterns:
return "sarcastic mockery"
# 12. Menacing Threat
if fear > 0.3 and "threat" in patterns:
return "menacing threat"
# 13. Pleading Concern
if sadness > 0.3 and any(k in text_lower for k in APOLOGY_KEYWORDS) and not patterns:
return "pleading concern"
# 14. Fear-mongering
if (fear + disgust) > 0.5 and "projection" in patterns:
return "fear-mongering"
# 15. Disbelieving Accusation
if surprise > 0.3 and "blame shifting" in patterns:
return "disbelieving accusation"
# 16. Empathetic Solidarity
if joy > 0.2 and sadness > 0.2 and not patterns:
return "empathetic solidarity"
# 17. Assertive Boundary
if anger > 0.4 and "control" in patterns:
return "assertive boundary"
# 18. Stonewalling
if neutral > 0.7 and not patterns:
return "stonewalling"
return None
# ——— 5) Single message analysis ———————————————————————————————————————————
def analyze_message(text):
text_lower = text.lower()
emotion_profile = get_emotion_profile(text)
# 2a. get lexicon counts
lex_counts = score_emolex(text_lower)
max_lex = max(lex_counts.values()) or 1.0 # avoid div0
# 2b. normalize them to [0,1]
lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()}
# 2c. blend: take the max of transformer & lexicon
for emo in emotion_profile:
emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo, 0))
toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
logits = model(**toks).logits.squeeze(0)
scores = torch.sigmoid(logits).cpu().numpy()
active_patterns = [label for label, prob in zip(LABELS, scores) if prob >= THRESHOLDS[label]]
if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
active_patterns.append("recovery phase")
tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
return {"emotion_profile": emotion_profile, "active_patterns": active_patterns, "tone_tag": tone_tag}
# ——— 6) Composite wrapper ———————————————————————————————————————————————
def analyze_composite(uploaded_file, *texts):
outputs = []
if uploaded_file is not None:
try:
raw = uploaded_file.read()
except Exception:
with open(uploaded_file, "rb") as f:
raw = f.read()
name = (
uploaded_file.name.lower() if hasattr(uploaded_file, "name") else uploaded_file.lower()
)
if name.endswith((".png",".jpg",".jpeg",".tiff",".bmp",".gif")):
img = Image.open(io.BytesIO(raw))
arr = np.array(img.convert("RGB"))
texts_ocr = ocr_reader.readtext(arr, detail=0)
content = "\n".join(texts_ocr)
else:
try:
content = raw.decode("utf-8")
except UnicodeDecodeError:
content = raw.decode("latin-1")
r = analyze_message(content)
outputs.append(
"── Uploaded File ──\n"
f"Emotion Profile : {r['emotion_profile']}\n"
f"Active Patterns : {r['active_patterns']}\n"
f"Emotional Tone : {r['tone_tag']}\n"
)
for idx, txt in enumerate(texts, start=1):
if not txt:
continue
r = analyze_message(txt)
outputs.append(
f"── Message {idx} ──\n"
f"Emotion Profile : {r['emotion_profile']}\n"
f"Active Patterns : {r['active_patterns']}\n"
f"Emotional Tone : {r['tone_tag']}\n"
)
if not outputs:
return "Please enter at least one message."
return "\n".join(outputs)
# ——— 7) Gradio interface ———————————————————————————————————————————————
message_inputs = [gr.Textbox(label="Message")]
iface = gr.Interface(
fn=analyze_composite,
inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
outputs=gr.Textbox(label="Analysis"),
title="Tether Analyzer (extended tone tags)",
description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO."
)
if __name__ == "__main__":
iface.launch()
|