File size: 11,839 Bytes
239a968
70ce6b1
 
cd900c5
 
384efe9
 
dd699d2
 
 
 
 
 
 
 
4db4868
dd699d2
 
 
 
 
 
 
 
 
4db4868
dd699d2
 
 
 
 
 
 
a37b7df
 
 
 
 
 
 
 
 
d25e518
 
 
 
 
 
 
 
 
 
 
 
a37b7df
 
 
70ce6b1
fe6b66c
 
 
 
 
 
 
70ce6b1
 
 
 
 
4afc141
70ce6b1
4afc141
70ce6b1
4afc141
70ce6b1
 
 
 
 
 
 
 
4afc141
 
 
 
 
 
 
 
 
 
 
70ce6b1
 
384efe9
 
 
 
9d64e69
 
 
 
 
 
 
 
 
4db4868
a20729d
 
 
 
 
7020ff2
4db4868
 
 
 
 
 
 
49b9d9d
 
3cc85b8
384efe9
e905632
 
 
 
 
c96a489
7020ff2
49b9d9d
7020ff2
 
 
c96a489
7020ff2
49b9d9d
 
a20729d
7020ff2
c96a489
7020ff2
49b9d9d
 
a20729d
7020ff2
c96a489
7020ff2
49b9d9d
 
a20729d
7020ff2
c96a489
7020ff2
49b9d9d
 
a20729d
7020ff2
c96a489
7020ff2
49b9d9d
 
 
7020ff2
9d64e69
7020ff2
49b9d9d
 
a20729d
7020ff2
49b9d9d
7020ff2
49b9d9d
7020ff2
 
 
49b9d9d
7020ff2
49b9d9d
 
 
7020ff2
49b9d9d
7020ff2
49b9d9d
 
 
7020ff2
49b9d9d
7020ff2
49b9d9d
 
 
7020ff2
49b9d9d
7020ff2
49b9d9d
 
a20729d
 
7020ff2
9d64e69
7020ff2
49b9d9d
 
a20729d
7020ff2
9d64e69
7020ff2
49b9d9d
 
a20729d
7020ff2
9d64e69
7020ff2
49b9d9d
 
a20729d
7020ff2
49b9d9d
7020ff2
49b9d9d
7020ff2
 
 
49b9d9d
7020ff2
49b9d9d
7020ff2
 
 
9d64e69
93ddbae
 
4db4868
834f0ff
4afc141
d14c860
dd699d2
d25e518
4db4868
 
d25e518
dd699d2
4db4868
 
 
d25e518
70ce6b1
 
 
4db4868
4afc141
 
4db4868
4afc141
4db4868
 
 
 
 
70ce6b1
384efe9
93ddbae
70ce6b1
4db4868
d25e518
93ddbae
5ebe61a
4b0f5e3
4db4868
4b0f5e3
 
 
4db4868
 
d25e518
 
4db4868
4b0f5e3
 
 
 
 
 
 
 
 
 
 
 
 
4db4868
d25e518
70ce6b1
76dedd8
 
 
93ddbae
70ce6b1
cd900c5
 
 
76dedd8
4db4868
76dedd8
 
4b0f5e3
3f2016a
 
 
d25e518
fe6b66c
70ce6b1
384efe9
70ce6b1
4afc141
 
fe6b66c
 
70ce6b1
4db4868
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
import gradio as gr
import torch
from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import io
import easyocr
import numpy as np
import pandas as pd

# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
emo_raw = pd.read_csv(
    EMOLEX_PATH,
    sep="\t",
    names=["word","emotion","flag"],
    comment="#",
    header=None
)
emo_df = (
    emo_raw
    .pivot(index="word", columns="emotion", values="flag")
    .fillna(0)
    .astype(int)
)
EMOLEX = emo_df.to_dict(orient="index")

def score_emolex(text_lower):
    counts = {emo: 0 for emo in emo_df.columns}
    for tok in text_lower.split():
        if tok in EMOLEX:
            for emo, flag in EMOLEX[tok].items():
                counts[emo] += flag
    return counts

# ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
mpqa_lex = {}
with open(MPQA_PATH, encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line or line.startswith("#"):
            continue

        # build fields dict but skip any token without '='
        fields = {}
        for item in line.split():
            if "=" not in item:
                continue
            key, val = item.split("=", 1)
            fields[key] = val

        # must have word1
        if "word1" not in fields:
            continue
        w = fields.pop("word1").lower()
        mpqa_lex.setdefault(w, []).append(fields)

# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
emotion_pipeline = hf_pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    truncation=True
)
def get_emotion_profile(text):
    results = emotion_pipeline(text)
    if isinstance(results, list) and isinstance(results[0], list):
        results = results[0]
    return {r["label"].lower(): round(r["score"], 3) for r in results}

APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]

# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
model_name = "SamanthaStorm/tether-multilabel-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

LABELS = [
    "blame shifting", "contradictory statements", "control", "dismissiveness",
    "gaslighting", "guilt tripping", "insults", "obscure language",
    "projection", "recovery phase", "threat"
]
THRESHOLDS = {
    "blame shifting": 0.28,
    "contradictory statements": 0.27,
    "control": 0.08,
    "dismissiveness": 0.32,
    "gaslighting": 0.27,
    "guilt tripping": 0.31,
    "insults": 0.10,
    "obscure language": 0.55,
    "projection": 0.09,
    "recovery phase": 0.33,
    "threat": 0.15
}

# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
ocr_reader = easyocr.Reader(["en"], gpu=False)

# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
    sadness  = emotion_profile.get("sadness",  0)
    joy      = emotion_profile.get("joy",      0)
    neutral  = emotion_profile.get("neutral",  0)
    disgust  = emotion_profile.get("disgust",  0)
    anger    = emotion_profile.get("anger",    0)
    fear     = emotion_profile.get("fear",     0)
    surprise = emotion_profile.get("surprise", 0)

    # NRC-EmoLex counts
    words = text_lower.split()
    lex_counts = {
        emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
        for emo in ["anger","joy","sadness","fear","disgust"]
    }

    # MPQA counts
    mpqa_counts = {"strongsubj":0,"weaksubj":0,"positive":0,"negative":0}
    for w in words:
        for entry in mpqa_lex.get(w, []):
            mpqa_counts[entry["type"]]          += 1
            mpqa_counts[entry["priorpolarity"]] += 1

    # 0. Support override
    if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
        return "supportive"

+    # 1. Performative Regret
+    # → only when we see one of those patterns, not just lexicon hits
+    if sadness > 0.4 \
+       and any(p in patterns for p in ["blame shifting","guilt tripping","recovery phase"]):
+        return "performative regret"

    # 2. Coercive Warmth
    if (joy > 0.3 or sadness > 0.4) \
       and (lex_counts["joy"] > 0 or lex_counts["sadness"] > 0) \
       and any(p in patterns for p in ["control","gaslighting"]):
        return "coercive warmth"

    # 3. Cold Invalidation
    if (neutral + disgust) > 0.5 \
       and lex_counts["disgust"] > 0 \
       and any(p in patterns for p in ["dismissiveness","projection","obscure language"]):
        return "cold invalidation"

    # 4. Genuine Vulnerability
    if (sadness + fear) > 0.5 \
       and lex_counts["sadness"] > 0 and lex_counts["fear"] > 0 \
       and all(p == "recovery phase" for p in patterns):
        return "genuine vulnerability"

    # 5. Emotional Threat
    if (anger + disgust) > 0.5 \
       and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
       and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
        return "emotional threat"

    # 6. Weaponized Sadness
    if sadness > 0.6 \
       and lex_counts["sadness"] > 0 \
       and any(p in patterns for p in ["guilt tripping","projection"]):
        return "weaponized sadness"

    # 7. Toxic Resignation
    if neutral > 0.5 \
       and any(p in patterns for p in ["dismissiveness","obscure language"]) \
       and lex_counts["disgust"] == 0:
        return "toxic resignation"

    # 8. Indignant Reproach
    if anger > 0.5 \
       and lex_counts["anger"] > 0 \
       and any(p in patterns for p in ["guilt tripping","contradictory statements"]):
        return "indignant reproach"

    # 9. Confrontational
    if anger > 0.6 \
       and lex_counts["anger"] > 0 \
       and patterns:
        return "confrontational"

    # 10. Passive Aggression
    if neutral > 0.6 \
       and lex_counts["disgust"] > 0 \
       and any(p in patterns for p in ["dismissiveness","projection"]):
        return "passive aggression"

    # 11. Sarcastic Mockery
    if joy > 0.3 \
       and lex_counts["joy"] > 0 \
       and "insults" in patterns:
        return "sarcastic mockery"

    # 12. Menacing Threat
    if fear > 0.3 \
       and lex_counts["fear"] > 0 \
       and "threat" in patterns:
        return "menacing threat"

    # 13. Pleading Concern
    if sadness > 0.3 \
       and lex_counts["sadness"] > 0 \
       and any(k in text_lower for k in APOLOGY_KEYWORDS) \
       and not patterns:
        return "pleading concern"

    # 14. Fear-mongering
    if (fear + disgust) > 0.5 \
       and lex_counts["fear"] > 0 \
       and "projection" in patterns:
        return "fear-mongering"

    # 15. Disbelieving Accusation
    if surprise > 0.3 \
       and lex_counts["surprise"] > 0 \
       and "blame shifting" in patterns:
        return "disbelieving accusation"

    # 16. Empathetic Solidarity
    if joy > 0.2 and sadness > 0.2 \
       and lex_counts["joy"] > 0 and lex_counts["sadness"] > 0 \
       and not patterns:
        return "empathetic solidarity"

    # 17. Assertive Boundary
    if anger > 0.4 \
       and lex_counts["anger"] > 0 \
       and "control" in patterns:
        return "assertive boundary"

    # 18. Stonewalling
    if neutral > 0.7 \
       and lex_counts["disgust"] == 0 \
       and not patterns:
        return "stonewalling"

    return None

# ——— 5) Single-message analysis ———————————————————————————————————————————
def analyze_message(text):
    text_lower = text.lower()
    emotion_profile = get_emotion_profile(text)

    # blend in NRC-EmoLex
    lex_counts = score_emolex(text_lower)
    max_lex    = max(lex_counts.values()) or 1.0
    lex_scores = {emo: cnt/max_lex for emo, cnt in lex_counts.items()}
    for emo in emotion_profile:
        emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))

    # abuse-patterns
    toks   = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        logits = model(**toks).logits.squeeze(0)
    scores = torch.sigmoid(logits).cpu().numpy()
    active_patterns = [lab for lab, sc in zip(LABELS, scores) if sc >= THRESHOLDS[lab]]
    if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
        active_patterns.append("recovery phase")

    tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
    return {
        "emotion_profile": emotion_profile,
        "active_patterns": active_patterns,
        "tone_tag": tone_tag
    }

# ——— 6) Composite wrapper ———————————————————————————————————————————————
def analyze_composite(uploaded_file, *texts):
    outputs = []

    # file handling / OCR
    if uploaded_file is not None:
        try:
            raw = uploaded_file.read()
        except:
            with open(uploaded_file, "rb") as f:
                raw = f.read()

        name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
        if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
            img     = Image.open(io.BytesIO(raw))
            arr     = np.array(img.convert("RGB"))
            content = "\n".join(ocr_reader.readtext(arr, detail=0))
        else:
            try:
                content = raw.decode("utf-8")
            except UnicodeDecodeError:
                content = raw.decode("latin-1")

        r = analyze_message(content)
        outputs.append(
            "── Uploaded File ──\n"
            f"Emotion Profile : {r['emotion_profile']}\n"
            f"Active Patterns : {r['active_patterns']}\n"
            f"Emotional Tone  : {r['tone_tag']}\n"
        )

    # free-text messages
    for idx, txt in enumerate(texts, start=1):
        if not txt:
            continue
        r = analyze_message(txt)
        outputs.append(
            f"── Message {idx} ──\n"
            f"Emotion Profile : {r['emotion_profile']}\n"
            f"Active Patterns : {r['active_patterns']}\n"
            f"Emotional Tone  : {r['tone_tag']}\n"
        )

    if not outputs:
        return "Please enter at least one message."
    return "\n".join(outputs)

# ——— 7) Gradio interface ———————————————————————————————————————————————
message_inputs = [gr.Textbox(label="Message")]

iface = gr.Interface(
    fn=analyze_composite,
    inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
    outputs=gr.Textbox(label="Analysis"),
    title="Tether Analyzer (extended tone tags)",
    description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO."
)

if __name__ == "__main__":
    iface.launch()