Spaces:
Sleeping
Sleeping
File size: 13,004 Bytes
239a968 70ce6b1 cd900c5 384efe9 dd699d2 7020ff2 dd699d2 a37b7df fe6b66c a37b7df 70ce6b1 fe6b66c 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 384efe9 7020ff2 384efe9 9d64e69 a20729d 9d64e69 a20729d 9d64e69 a20729d 7020ff2 49b9d9d 3cc85b8 384efe9 7020ff2 49b9d9d c96a489 7020ff2 49b9d9d 7020ff2 c96a489 7020ff2 49b9d9d a20729d 7020ff2 c96a489 7020ff2 49b9d9d a20729d 7020ff2 c96a489 7020ff2 49b9d9d a20729d 7020ff2 c96a489 7020ff2 49b9d9d a20729d 7020ff2 c96a489 7020ff2 49b9d9d 7020ff2 9d64e69 7020ff2 49b9d9d a20729d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d a20729d 7020ff2 9d64e69 7020ff2 49b9d9d a20729d 7020ff2 9d64e69 7020ff2 49b9d9d a20729d 7020ff2 9d64e69 7020ff2 49b9d9d a20729d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 9d64e69 93ddbae 384efe9 834f0ff 4afc141 d14c860 dd699d2 70ce6b1 384efe9 4afc141 384efe9 70ce6b1 384efe9 93ddbae 70ce6b1 93ddbae 5ebe61a 4b0f5e3 3f2016a 76dedd8 3f2016a 4b0f5e3 3f2016a 4b0f5e3 70ce6b1 76dedd8 93ddbae 70ce6b1 cd900c5 76dedd8 4b0f5e3 3f2016a fe6b66c 70ce6b1 384efe9 70ce6b1 4afc141 fe6b66c 70ce6b1 384efe9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 |
import gradio as gr
import torch
from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import io
import easyocr
import numpy as np
import pandas as pd
# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
# Make sure this filename matches exactly what you’ve uploaded
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
# Load the raw triples
emo_raw = pd.read_csv(
EMOLEX_PATH,
sep="\t",
names=["word","emotion","flag"],
comment="#", # skip any commented lines
header=None
)
# Pivot: word → { emotion: 0 or 1, … }
emo_df = (
emo_raw
.pivot(index="word", columns="emotion", values="flag")
.fillna(0)
.astype(int)
)
# Final lookup dict: EMOLEX["happy"]["joy"] == 1
EMOLEX = emo_df.to_dict(orient="index")
def score_emolex(text_lower):
# count how many times each emotion appears in the lexicon
counts = {emo: 0 for emo in emo_df.columns}
for tok in text_lower.split():
if tok in EMOLEX:
for emo, flag in EMOLEX[tok].items():
counts[emo] += flag
return counts
import re
# ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
# mpqa_lex[word] = list of feature‐dicts for that word
mpqa_lex = {}
with open(MPQA_PATH, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
# each line looks like: type=strongsubj len=1 word1=abandon pos1=verb stemmed1=y priorpolarity=negative
fields = dict(item.split("=",1) for item in line.split())
w = fields.pop("word1").lower()
mpqa_lex.setdefault(w, []).append(fields)
# e.g. mpqa_lex["abandon"] == [ {'type':'strongsubj','len':'1','pos1':'verb','stemmed1':'y','priorpolarity':'negative'} ]
# ——— In your get_emotional_tone_tag, just after you split words… ——————————————————————
words = text_lower.split()
# count MPQA hits
mpqa_counts = {
"strongsubj": 0,
"weaksubj": 0,
"positive": 0,
"negative": 0,
}
for w in words:
for entry in mpqa_lex.get(w, []):
mpqa_counts[ entry["type"] ] += 1
mpqa_counts[ entry["priorpolarity"] ] += 1
# now you can reference mpqa_counts["negative"], etc.
# for example, tweak your “Emotional Threat” rule to require at least one strong negative subj:
if (anger + disgust) > 0.5 \
and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
and mpqa_counts["strongsubj"] > 0 \
and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
return "emotional threat"
# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
emotion_pipeline = hf_pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
top_k=None,
truncation=True
)
def get_emotion_profile(text):
results = emotion_pipeline(text)
if isinstance(results, list) and isinstance(results[0], list):
results = results[0]
return {r["label"].lower(): round(r["score"], 3) for r in results}
# apology keywords for pleading concern
APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
model_name = "SamanthaStorm/tether-multilabel-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
LABELS = [
"blame shifting", "contradictory statements", "control", "dismissiveness",
"gaslighting", "guilt tripping", "insults", "obscure language",
"projection", "recovery phase", "threat"
]
THRESHOLDS = {
"blame shifting": 0.28,
"contradictory statements": 0.27,
"control": 0.08,
"dismissiveness": 0.32,
"gaslighting": 0.27,
"guilt tripping": 0.31,
"insults": 0.10,
"obscure language": 0.55,
"projection": 0.09,
"recovery phase": 0.33,
"threat": 0.15
}
# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
ocr_reader = easyocr.Reader(["en"], gpu=False)
# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
"""
Assigns one of 18 nuanced tone categories based on
model scores, NRC-EmoLex counts, detected patterns, and text.
"""
# unpack model emotion scores
sadness = emotion_profile.get("sadness", 0)
joy = emotion_profile.get("joy", 0)
neutral = emotion_profile.get("neutral", 0)
disgust = emotion_profile.get("disgust", 0)
anger = emotion_profile.get("anger", 0)
fear = emotion_profile.get("fear", 0)
surprise = emotion_profile.get("surprise", 0)
# count lexicon hits for the big five
words = text_lower.split()
lex_counts = {
emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
for emo in ["anger","joy","sadness","fear","disgust"]
}
# 0. Support override
if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
return "supportive"
# 1. Performative Regret
if sadness > 0.4 \
and (lex_counts["sadness"] > 0 or any(p in patterns for p in ["blame shifting","guilt tripping","recovery phase"])):
return "performative regret"
# 2. Coercive Warmth
if (joy > 0.3 or sadness > 0.4) \
and (lex_counts["joy"] > 0 or lex_counts["sadness"] > 0) \
and any(p in patterns for p in ["control","gaslighting"]):
return "coercive warmth"
# 3. Cold Invalidation
if (neutral + disgust) > 0.5 \
and lex_counts["disgust"] > 0 \
and any(p in patterns for p in ["dismissiveness","projection","obscure language"]):
return "cold invalidation"
# 4. Genuine Vulnerability
if (sadness + fear) > 0.5 \
and lex_counts["sadness"] > 0 and lex_counts["fear"] > 0 \
and all(p == "recovery phase" for p in patterns):
return "genuine vulnerability"
# 5. Emotional Threat
if (anger + disgust) > 0.5 \
and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
return "emotional threat"
# 6. Weaponized Sadness
if sadness > 0.6 \
and lex_counts["sadness"] > 0 \
and any(p in patterns for p in ["guilt tripping","projection"]):
return "weaponized sadness"
# 7. Toxic Resignation
if neutral > 0.5 \
and any(p in patterns for p in ["dismissiveness","obscure language"]) \
and lex_counts["disgust"] == 0:
return "toxic resignation"
# 8. Indignant Reproach
if anger > 0.5 \
and lex_counts["anger"] > 0 \
and any(p in patterns for p in ["guilt tripping","contradictory statements"]):
return "indignant reproach"
# 9. Confrontational
if anger > 0.6 \
and lex_counts["anger"] > 0 \
and patterns:
return "confrontational"
# 10. Passive Aggression
if neutral > 0.6 \
and lex_counts["disgust"] > 0 \
and any(p in patterns for p in ["dismissiveness","projection"]):
return "passive aggression"
# 11. Sarcastic Mockery
if joy > 0.3 \
and lex_counts["joy"] > 0 \
and "insults" in patterns:
return "sarcastic mockery"
# 12. Menacing Threat
if fear > 0.3 \
and lex_counts["fear"] > 0 \
and "threat" in patterns:
return "menacing threat"
# 13. Pleading Concern
if sadness > 0.3 \
and lex_counts["sadness"] > 0 \
and any(k in text_lower for k in APOLOGY_KEYWORDS) \
and not patterns:
return "pleading concern"
# 14. Fear-mongering
if (fear + disgust) > 0.5 \
and lex_counts["fear"] > 0 \
and "projection" in patterns:
return "fear-mongering"
# 15. Disbelieving Accusation
if surprise > 0.3 \
and lex_counts["surprise"] > 0 \
and "blame shifting" in patterns:
return "disbelieving accusation"
# 16. Empathetic Solidarity
if joy > 0.2 and sadness > 0.2 \
and lex_counts["joy"] > 0 and lex_counts["sadness"] > 0 \
and not patterns:
return "empathetic solidarity"
# 17. Assertive Boundary
if anger > 0.4 \
and lex_counts["anger"] > 0 \
and "control" in patterns:
return "assertive boundary"
# 18. Stonewalling
if neutral > 0.7 \
and lex_counts["disgust"] == 0 \
and not patterns:
return "stonewalling"
return None
# ——— 5) Single message analysis ———————————————————————————————————————————
def analyze_message(text):
text_lower = text.lower()
emotion_profile = get_emotion_profile(text)
# 2a. get lexicon counts
lex_counts = score_emolex(text_lower)
max_lex = max(lex_counts.values()) or 1.0 # avoid div0
# 2b. normalize them to [0,1]
lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()}
# 2c. blend: take the max of transformer & lexicon
for emo in emotion_profile:
emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo, 0))
toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
logits = model(**toks).logits.squeeze(0)
scores = torch.sigmoid(logits).cpu().numpy()
active_patterns = [label for label, prob in zip(LABELS, scores) if prob >= THRESHOLDS[label]]
if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
active_patterns.append("recovery phase")
tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
return {"emotion_profile": emotion_profile, "active_patterns": active_patterns, "tone_tag": tone_tag}
# ——— 6) Composite wrapper ———————————————————————————————————————————————
def analyze_composite(uploaded_file, *texts):
outputs = []
if uploaded_file is not None:
try:
raw = uploaded_file.read()
except Exception:
with open(uploaded_file, "rb") as f:
raw = f.read()
name = (
uploaded_file.name.lower() if hasattr(uploaded_file, "name") else uploaded_file.lower()
)
if name.endswith((".png",".jpg",".jpeg",".tiff",".bmp",".gif")):
img = Image.open(io.BytesIO(raw))
arr = np.array(img.convert("RGB"))
texts_ocr = ocr_reader.readtext(arr, detail=0)
content = "\n".join(texts_ocr)
else:
try:
content = raw.decode("utf-8")
except UnicodeDecodeError:
content = raw.decode("latin-1")
r = analyze_message(content)
outputs.append(
"── Uploaded File ──\n"
f"Emotion Profile : {r['emotion_profile']}\n"
f"Active Patterns : {r['active_patterns']}\n"
f"Emotional Tone : {r['tone_tag']}\n"
)
for idx, txt in enumerate(texts, start=1):
if not txt:
continue
r = analyze_message(txt)
outputs.append(
f"── Message {idx} ──\n"
f"Emotion Profile : {r['emotion_profile']}\n"
f"Active Patterns : {r['active_patterns']}\n"
f"Emotional Tone : {r['tone_tag']}\n"
)
if not outputs:
return "Please enter at least one message."
return "\n".join(outputs)
# ——— 7) Gradio interface ———————————————————————————————————————————————
message_inputs = [gr.Textbox(label="Message")]
iface = gr.Interface(
fn=analyze_composite,
inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
outputs=gr.Textbox(label="Analysis"),
title="Tether Analyzer (extended tone tags)",
description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO."
)
if __name__ == "__main__":
iface.launch()
|