File size: 10,090 Bytes
239a968
70ce6b1
 
cd900c5
 
384efe9
 
dd699d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe6b66c
70ce6b1
fe6b66c
 
 
 
 
 
 
 
70ce6b1
 
 
 
 
4afc141
 
70ce6b1
4afc141
70ce6b1
4afc141
70ce6b1
 
 
 
 
 
 
 
 
4afc141
 
 
 
 
 
 
 
 
 
 
70ce6b1
 
384efe9
 
 
 
9d64e69
 
4afc141
9d64e69
384efe9
9d64e69
 
 
 
 
 
 
 
384efe9
 
3cc85b8
384efe9
 
 
c96a489
 
 
384efe9
c96a489
 
 
384efe9
c96a489
 
 
384efe9
c96a489
 
 
384efe9
c96a489
 
 
384efe9
c96a489
 
 
384efe9
c96a489
9d64e69
3624f82
384efe9
3624f82
9d64e69
 
 
 
 
 
384efe9
3624f82
9d64e69
 
3624f82
 
93ddbae
9d64e69
 
 
 
 
384efe9
9d64e69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93ddbae
 
384efe9
834f0ff
4afc141
d14c860
 
dd699d2
 
 
 
 
 
 
 
 
70ce6b1
 
 
 
384efe9
4afc141
 
 
384efe9
70ce6b1
384efe9
93ddbae
70ce6b1
93ddbae
5ebe61a
4b0f5e3
 
 
 
 
 
3f2016a
76dedd8
3f2016a
4b0f5e3
3f2016a
 
 
4b0f5e3
 
 
 
 
 
 
 
 
 
 
 
 
70ce6b1
76dedd8
 
 
93ddbae
70ce6b1
cd900c5
 
 
76dedd8
 
 
4b0f5e3
3f2016a
 
 
fe6b66c
70ce6b1
384efe9
70ce6b1
4afc141
 
fe6b66c
 
70ce6b1
384efe9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import gradio as gr
import torch
from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image
import io
import easyocr
import numpy as np
import pandas as pd

# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
# Make sure this filename matches exactly what you’ve uploaded
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"

# Load the raw triples
emo_raw = pd.read_csv(
    EMOLEX_PATH,
    sep="\t",
    names=["word","emotion","flag"],
    comment="#",      # skip any commented lines
    header=None
)

# Pivot: word → { emotion: 0 or 1, … }
emo_df = (
    emo_raw
    .pivot(index="word", columns="emotion", values="flag")
    .fillna(0)
    .astype(int)
)

# Final lookup dict: EMOLEX["happy"]["joy"] == 1
EMOLEX = emo_df.to_dict(orient="index")
def score_emolex(text_lower):
    # count how many times each emotion appears in the lexicon
    counts = {emo: 0 for emo in emo_df.columns}
    for tok in text_lower.split():
        if tok in EMOLEX:
            for emo, flag in EMOLEX[tok].items():
                counts[emo] += flag
    return counts

# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
emotion_pipeline = hf_pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    truncation=True
)

def get_emotion_profile(text):
    results = emotion_pipeline(text)
    if isinstance(results, list) and isinstance(results[0], list):
        results = results[0]
    return {r["label"].lower(): round(r["score"], 3) for r in results}

# apology keywords for pleading concern
APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]

# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
model_name = "SamanthaStorm/tether-multilabel-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

LABELS = [
    "blame shifting", "contradictory statements", "control", "dismissiveness",
    "gaslighting", "guilt tripping", "insults", "obscure language",
    "projection", "recovery phase", "threat"
]

THRESHOLDS = {
    "blame shifting": 0.28,
    "contradictory statements": 0.27,
    "control": 0.08,
    "dismissiveness": 0.32,
    "gaslighting": 0.27,
    "guilt tripping": 0.31,
    "insults": 0.10,
    "obscure language": 0.55,
    "projection": 0.09,
    "recovery phase": 0.33,
    "threat": 0.15
}

# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
ocr_reader = easyocr.Reader(["en"], gpu=False)

# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
    """
    Assigns one of 18 nuanced tone categories based on emotion scores, patterns, and text.
    """
    # unpack all emotion scores before any rules
    sadness  = emotion_profile.get("sadness",  0)
    joy      = emotion_profile.get("joy",      0)
    neutral  = emotion_profile.get("neutral",  0)
    disgust  = emotion_profile.get("disgust",  0)
    anger    = emotion_profile.get("anger",    0)
    fear     = emotion_profile.get("fear",     0)
    surprise = emotion_profile.get("surprise", 0)

    # 0. Support override
    if any(k in text_lower for k in ["support", "hope", "grace"]):
        return "supportive"

    # 1. Performative Regret
    if sadness > 0.4 and any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]):
        return "performative regret"

    # 2. Coercive Warmth
    if (joy > 0.3 or sadness > 0.4) and any(p in patterns for p in ["control", "gaslighting"]):
        return "coercive warmth"

    # 3. Cold Invalidation
    if (neutral + disgust) > 0.5 and any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]):
        return "cold invalidation"

    # 4. Genuine Vulnerability
    if (sadness + fear) > 0.5 and all(p == "recovery phase" for p in patterns):
        return "genuine vulnerability"

    # 5. Emotional Threat
    if (anger + disgust) > 0.5 and any(p in patterns for p in ["control", "threat", "insults", "dismissiveness"]):
        return "emotional threat"

    # 6. Weaponized Sadness
    if sadness > 0.6 and any(p in patterns for p in ["guilt tripping", "projection"]):
        return "weaponized sadness"

    # 7. Toxic Resignation
    if neutral > 0.5 and any(p in patterns for p in ["dismissiveness", "obscure language"]):
        return "toxic resignation"

    # 8. Indignant Reproach
    if anger > 0.5 and any(p in patterns for p in ["guilt tripping", "contradictory statements"]):
        return "indignant reproach"

    # 9. Confrontational
    if anger > 0.6 and patterns:
        return "confrontational"

    # 10. Passive Aggression
    if neutral > 0.6 and any(p in patterns for p in ["dismissiveness", "projection"]):
        return "passive aggression"

    # 11. Sarcastic Mockery
    if joy > 0.3 and "insults" in patterns:
        return "sarcastic mockery"

    # 12. Menacing Threat
    if fear > 0.3 and "threat" in patterns:
        return "menacing threat"

    # 13. Pleading Concern
    if sadness > 0.3 and any(k in text_lower for k in APOLOGY_KEYWORDS) and not patterns:
        return "pleading concern"

    # 14. Fear-mongering
    if (fear + disgust) > 0.5 and "projection" in patterns:
        return "fear-mongering"

    # 15. Disbelieving Accusation
    if surprise > 0.3 and "blame shifting" in patterns:
        return "disbelieving accusation"

    # 16. Empathetic Solidarity
    if joy > 0.2 and sadness > 0.2 and not patterns:
        return "empathetic solidarity"

    # 17. Assertive Boundary
    if anger > 0.4 and "control" in patterns:
        return "assertive boundary"

    # 18. Stonewalling
    if neutral > 0.7 and not patterns:
        return "stonewalling"

    return None

# ——— 5) Single message analysis ———————————————————————————————————————————
def analyze_message(text):
    text_lower = text.lower()
    emotion_profile = get_emotion_profile(text)
    # 2a. get lexicon counts
    lex_counts = score_emolex(text_lower)
    max_lex    = max(lex_counts.values()) or 1.0    # avoid div0

    # 2b. normalize them to [0,1]
    lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()}

    # 2c. blend: take the max of transformer & lexicon
    for emo in emotion_profile:
        emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo, 0))
    toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        logits = model(**toks).logits.squeeze(0)
    scores = torch.sigmoid(logits).cpu().numpy()
    active_patterns = [label for label, prob in zip(LABELS, scores) if prob >= THRESHOLDS[label]]
    if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
        active_patterns.append("recovery phase")
    tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
    return {"emotion_profile": emotion_profile, "active_patterns": active_patterns, "tone_tag": tone_tag}

# ——— 6) Composite wrapper ———————————————————————————————————————————————
def analyze_composite(uploaded_file, *texts):
    outputs = []
    if uploaded_file is not None:
        try:
            raw = uploaded_file.read()
        except Exception:
            with open(uploaded_file, "rb") as f:
                raw = f.read()

        name = (
            uploaded_file.name.lower() if hasattr(uploaded_file, "name") else uploaded_file.lower()
        )
        if name.endswith((".png",".jpg",".jpeg",".tiff",".bmp",".gif")):
            img = Image.open(io.BytesIO(raw))
            arr = np.array(img.convert("RGB"))
            texts_ocr = ocr_reader.readtext(arr, detail=0)
            content = "\n".join(texts_ocr)
        else:
            try:
                content = raw.decode("utf-8")
            except UnicodeDecodeError:
                content = raw.decode("latin-1")

        r = analyze_message(content)
        outputs.append(
            "── Uploaded File ──\n"
            f"Emotion Profile : {r['emotion_profile']}\n"
            f"Active Patterns : {r['active_patterns']}\n"
            f"Emotional Tone  : {r['tone_tag']}\n"
        )
    for idx, txt in enumerate(texts, start=1):
        if not txt:
            continue
        r = analyze_message(txt)
        outputs.append(
            f"── Message {idx} ──\n"
            f"Emotion Profile : {r['emotion_profile']}\n"
            f"Active Patterns : {r['active_patterns']}\n"
            f"Emotional Tone  : {r['tone_tag']}\n"
        )
    if not outputs:
        return "Please enter at least one message."
    return "\n".join(outputs)

# ——— 7) Gradio interface ———————————————————————————————————————————————
message_inputs = [gr.Textbox(label="Message")]
iface = gr.Interface(
    fn=analyze_composite,
    inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
    outputs=gr.Textbox(label="Analysis"),
    title="Tether Analyzer (extended tone tags)",
    description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO."
)

if __name__ == "__main__":
    iface.launch()