Spaces:
Sleeping
Sleeping
File size: 11,839 Bytes
239a968 70ce6b1 cd900c5 384efe9 dd699d2 4db4868 dd699d2 4db4868 dd699d2 a37b7df d25e518 a37b7df 70ce6b1 fe6b66c 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 4afc141 70ce6b1 384efe9 9d64e69 4db4868 a20729d 7020ff2 4db4868 49b9d9d 3cc85b8 384efe9 e905632 c96a489 7020ff2 49b9d9d 7020ff2 c96a489 7020ff2 49b9d9d a20729d 7020ff2 c96a489 7020ff2 49b9d9d a20729d 7020ff2 c96a489 7020ff2 49b9d9d a20729d 7020ff2 c96a489 7020ff2 49b9d9d a20729d 7020ff2 c96a489 7020ff2 49b9d9d 7020ff2 9d64e69 7020ff2 49b9d9d a20729d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d a20729d 7020ff2 9d64e69 7020ff2 49b9d9d a20729d 7020ff2 9d64e69 7020ff2 49b9d9d a20729d 7020ff2 9d64e69 7020ff2 49b9d9d a20729d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 49b9d9d 7020ff2 9d64e69 93ddbae 4db4868 834f0ff 4afc141 d14c860 dd699d2 d25e518 4db4868 d25e518 dd699d2 4db4868 d25e518 70ce6b1 4db4868 4afc141 4db4868 4afc141 4db4868 70ce6b1 384efe9 93ddbae 70ce6b1 4db4868 d25e518 93ddbae 5ebe61a 4b0f5e3 4db4868 4b0f5e3 4db4868 d25e518 4db4868 4b0f5e3 4db4868 d25e518 70ce6b1 76dedd8 93ddbae 70ce6b1 cd900c5 76dedd8 4db4868 76dedd8 4b0f5e3 3f2016a d25e518 fe6b66c 70ce6b1 384efe9 70ce6b1 4afc141 fe6b66c 70ce6b1 4db4868 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
import gradio as gr
import torch
from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import io
import easyocr
import numpy as np
import pandas as pd
# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
emo_raw = pd.read_csv(
EMOLEX_PATH,
sep="\t",
names=["word","emotion","flag"],
comment="#",
header=None
)
emo_df = (
emo_raw
.pivot(index="word", columns="emotion", values="flag")
.fillna(0)
.astype(int)
)
EMOLEX = emo_df.to_dict(orient="index")
def score_emolex(text_lower):
counts = {emo: 0 for emo in emo_df.columns}
for tok in text_lower.split():
if tok in EMOLEX:
for emo, flag in EMOLEX[tok].items():
counts[emo] += flag
return counts
# ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
mpqa_lex = {}
with open(MPQA_PATH, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
# build fields dict but skip any token without '='
fields = {}
for item in line.split():
if "=" not in item:
continue
key, val = item.split("=", 1)
fields[key] = val
# must have word1
if "word1" not in fields:
continue
w = fields.pop("word1").lower()
mpqa_lex.setdefault(w, []).append(fields)
# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
emotion_pipeline = hf_pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
top_k=None,
truncation=True
)
def get_emotion_profile(text):
results = emotion_pipeline(text)
if isinstance(results, list) and isinstance(results[0], list):
results = results[0]
return {r["label"].lower(): round(r["score"], 3) for r in results}
APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
model_name = "SamanthaStorm/tether-multilabel-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
LABELS = [
"blame shifting", "contradictory statements", "control", "dismissiveness",
"gaslighting", "guilt tripping", "insults", "obscure language",
"projection", "recovery phase", "threat"
]
THRESHOLDS = {
"blame shifting": 0.28,
"contradictory statements": 0.27,
"control": 0.08,
"dismissiveness": 0.32,
"gaslighting": 0.27,
"guilt tripping": 0.31,
"insults": 0.10,
"obscure language": 0.55,
"projection": 0.09,
"recovery phase": 0.33,
"threat": 0.15
}
# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
ocr_reader = easyocr.Reader(["en"], gpu=False)
# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
sadness = emotion_profile.get("sadness", 0)
joy = emotion_profile.get("joy", 0)
neutral = emotion_profile.get("neutral", 0)
disgust = emotion_profile.get("disgust", 0)
anger = emotion_profile.get("anger", 0)
fear = emotion_profile.get("fear", 0)
surprise = emotion_profile.get("surprise", 0)
# NRC-EmoLex counts
words = text_lower.split()
lex_counts = {
emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
for emo in ["anger","joy","sadness","fear","disgust"]
}
# MPQA counts
mpqa_counts = {"strongsubj":0,"weaksubj":0,"positive":0,"negative":0}
for w in words:
for entry in mpqa_lex.get(w, []):
mpqa_counts[entry["type"]] += 1
mpqa_counts[entry["priorpolarity"]] += 1
# 0. Support override
if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
return "supportive"
+ # 1. Performative Regret
+ # → only when we see one of those patterns, not just lexicon hits
+ if sadness > 0.4 \
+ and any(p in patterns for p in ["blame shifting","guilt tripping","recovery phase"]):
+ return "performative regret"
# 2. Coercive Warmth
if (joy > 0.3 or sadness > 0.4) \
and (lex_counts["joy"] > 0 or lex_counts["sadness"] > 0) \
and any(p in patterns for p in ["control","gaslighting"]):
return "coercive warmth"
# 3. Cold Invalidation
if (neutral + disgust) > 0.5 \
and lex_counts["disgust"] > 0 \
and any(p in patterns for p in ["dismissiveness","projection","obscure language"]):
return "cold invalidation"
# 4. Genuine Vulnerability
if (sadness + fear) > 0.5 \
and lex_counts["sadness"] > 0 and lex_counts["fear"] > 0 \
and all(p == "recovery phase" for p in patterns):
return "genuine vulnerability"
# 5. Emotional Threat
if (anger + disgust) > 0.5 \
and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
return "emotional threat"
# 6. Weaponized Sadness
if sadness > 0.6 \
and lex_counts["sadness"] > 0 \
and any(p in patterns for p in ["guilt tripping","projection"]):
return "weaponized sadness"
# 7. Toxic Resignation
if neutral > 0.5 \
and any(p in patterns for p in ["dismissiveness","obscure language"]) \
and lex_counts["disgust"] == 0:
return "toxic resignation"
# 8. Indignant Reproach
if anger > 0.5 \
and lex_counts["anger"] > 0 \
and any(p in patterns for p in ["guilt tripping","contradictory statements"]):
return "indignant reproach"
# 9. Confrontational
if anger > 0.6 \
and lex_counts["anger"] > 0 \
and patterns:
return "confrontational"
# 10. Passive Aggression
if neutral > 0.6 \
and lex_counts["disgust"] > 0 \
and any(p in patterns for p in ["dismissiveness","projection"]):
return "passive aggression"
# 11. Sarcastic Mockery
if joy > 0.3 \
and lex_counts["joy"] > 0 \
and "insults" in patterns:
return "sarcastic mockery"
# 12. Menacing Threat
if fear > 0.3 \
and lex_counts["fear"] > 0 \
and "threat" in patterns:
return "menacing threat"
# 13. Pleading Concern
if sadness > 0.3 \
and lex_counts["sadness"] > 0 \
and any(k in text_lower for k in APOLOGY_KEYWORDS) \
and not patterns:
return "pleading concern"
# 14. Fear-mongering
if (fear + disgust) > 0.5 \
and lex_counts["fear"] > 0 \
and "projection" in patterns:
return "fear-mongering"
# 15. Disbelieving Accusation
if surprise > 0.3 \
and lex_counts["surprise"] > 0 \
and "blame shifting" in patterns:
return "disbelieving accusation"
# 16. Empathetic Solidarity
if joy > 0.2 and sadness > 0.2 \
and lex_counts["joy"] > 0 and lex_counts["sadness"] > 0 \
and not patterns:
return "empathetic solidarity"
# 17. Assertive Boundary
if anger > 0.4 \
and lex_counts["anger"] > 0 \
and "control" in patterns:
return "assertive boundary"
# 18. Stonewalling
if neutral > 0.7 \
and lex_counts["disgust"] == 0 \
and not patterns:
return "stonewalling"
return None
# ——— 5) Single-message analysis ———————————————————————————————————————————
def analyze_message(text):
text_lower = text.lower()
emotion_profile = get_emotion_profile(text)
# blend in NRC-EmoLex
lex_counts = score_emolex(text_lower)
max_lex = max(lex_counts.values()) or 1.0
lex_scores = {emo: cnt/max_lex for emo, cnt in lex_counts.items()}
for emo in emotion_profile:
emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))
# abuse-patterns
toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
logits = model(**toks).logits.squeeze(0)
scores = torch.sigmoid(logits).cpu().numpy()
active_patterns = [lab for lab, sc in zip(LABELS, scores) if sc >= THRESHOLDS[lab]]
if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
active_patterns.append("recovery phase")
tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
return {
"emotion_profile": emotion_profile,
"active_patterns": active_patterns,
"tone_tag": tone_tag
}
# ——— 6) Composite wrapper ———————————————————————————————————————————————
def analyze_composite(uploaded_file, *texts):
outputs = []
# file handling / OCR
if uploaded_file is not None:
try:
raw = uploaded_file.read()
except:
with open(uploaded_file, "rb") as f:
raw = f.read()
name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
img = Image.open(io.BytesIO(raw))
arr = np.array(img.convert("RGB"))
content = "\n".join(ocr_reader.readtext(arr, detail=0))
else:
try:
content = raw.decode("utf-8")
except UnicodeDecodeError:
content = raw.decode("latin-1")
r = analyze_message(content)
outputs.append(
"── Uploaded File ──\n"
f"Emotion Profile : {r['emotion_profile']}\n"
f"Active Patterns : {r['active_patterns']}\n"
f"Emotional Tone : {r['tone_tag']}\n"
)
# free-text messages
for idx, txt in enumerate(texts, start=1):
if not txt:
continue
r = analyze_message(txt)
outputs.append(
f"── Message {idx} ──\n"
f"Emotion Profile : {r['emotion_profile']}\n"
f"Active Patterns : {r['active_patterns']}\n"
f"Emotional Tone : {r['tone_tag']}\n"
)
if not outputs:
return "Please enter at least one message."
return "\n".join(outputs)
# ——— 7) Gradio interface ———————————————————————————————————————————————
message_inputs = [gr.Textbox(label="Message")]
iface = gr.Interface(
fn=analyze_composite,
inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
outputs=gr.Textbox(label="Analysis"),
title="Tether Analyzer (extended tone tags)",
description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO."
)
if __name__ == "__main__":
iface.launch() |