Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,79 +7,43 @@ import easyocr
|
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
9 |
|
10 |
-
|
11 |
# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
|
12 |
-
# Make sure this filename matches exactly what you’ve uploaded
|
13 |
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
|
14 |
-
|
15 |
-
# Load the raw triples
|
16 |
emo_raw = pd.read_csv(
|
17 |
EMOLEX_PATH,
|
18 |
sep="\t",
|
19 |
names=["word","emotion","flag"],
|
20 |
-
comment="#",
|
21 |
header=None
|
22 |
)
|
23 |
-
|
24 |
-
# Pivot: word → { emotion: 0 or 1, … }
|
25 |
emo_df = (
|
26 |
emo_raw
|
27 |
.pivot(index="word", columns="emotion", values="flag")
|
28 |
.fillna(0)
|
29 |
.astype(int)
|
30 |
)
|
31 |
-
|
32 |
-
# Final lookup dict: EMOLEX["happy"]["joy"] == 1
|
33 |
EMOLEX = emo_df.to_dict(orient="index")
|
|
|
34 |
def score_emolex(text_lower):
|
35 |
-
# count how many times each emotion appears in the lexicon
|
36 |
counts = {emo: 0 for emo in emo_df.columns}
|
37 |
for tok in text_lower.split():
|
38 |
if tok in EMOLEX:
|
39 |
for emo, flag in EMOLEX[tok].items():
|
40 |
counts[emo] += flag
|
41 |
return counts
|
42 |
-
import re
|
43 |
|
44 |
# ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
|
45 |
MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
|
46 |
-
|
47 |
-
# mpqa_lex[word] = list of feature‐dicts for that word
|
48 |
mpqa_lex = {}
|
49 |
with open(MPQA_PATH, encoding="utf-8") as f:
|
50 |
for line in f:
|
51 |
line = line.strip()
|
52 |
if not line or line.startswith("#"):
|
53 |
continue
|
54 |
-
# each line looks like: type=strongsubj len=1 word1=abandon pos1=verb stemmed1=y priorpolarity=negative
|
55 |
fields = dict(item.split("=",1) for item in line.split())
|
56 |
w = fields.pop("word1").lower()
|
57 |
mpqa_lex.setdefault(w, []).append(fields)
|
58 |
|
59 |
-
# e.g. mpqa_lex["abandon"] == [ {'type':'strongsubj','len':'1','pos1':'verb','stemmed1':'y','priorpolarity':'negative'} ]
|
60 |
-
|
61 |
-
# ——— In your get_emotional_tone_tag, just after you split words… ——————————————————————
|
62 |
-
words = text_lower.split()
|
63 |
-
|
64 |
-
# count MPQA hits
|
65 |
-
mpqa_counts = {
|
66 |
-
"strongsubj": 0,
|
67 |
-
"weaksubj": 0,
|
68 |
-
"positive": 0,
|
69 |
-
"negative": 0,
|
70 |
-
}
|
71 |
-
for w in words:
|
72 |
-
for entry in mpqa_lex.get(w, []):
|
73 |
-
mpqa_counts[ entry["type"] ] += 1
|
74 |
-
mpqa_counts[ entry["priorpolarity"] ] += 1
|
75 |
-
|
76 |
-
# now you can reference mpqa_counts["negative"], etc.
|
77 |
-
# for example, tweak your “Emotional Threat” rule to require at least one strong negative subj:
|
78 |
-
if (anger + disgust) > 0.5 \
|
79 |
-
and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
|
80 |
-
and mpqa_counts["strongsubj"] > 0 \
|
81 |
-
and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
|
82 |
-
return "emotional threat"
|
83 |
# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
|
84 |
emotion_pipeline = hf_pipeline(
|
85 |
"text-classification",
|
@@ -87,14 +51,12 @@ emotion_pipeline = hf_pipeline(
|
|
87 |
top_k=None,
|
88 |
truncation=True
|
89 |
)
|
90 |
-
|
91 |
def get_emotion_profile(text):
|
92 |
results = emotion_pipeline(text)
|
93 |
if isinstance(results, list) and isinstance(results[0], list):
|
94 |
results = results[0]
|
95 |
return {r["label"].lower(): round(r["score"], 3) for r in results}
|
96 |
|
97 |
-
# apology keywords for pleading concern
|
98 |
APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
|
99 |
|
100 |
# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
|
@@ -107,7 +69,6 @@ LABELS = [
|
|
107 |
"gaslighting", "guilt tripping", "insults", "obscure language",
|
108 |
"projection", "recovery phase", "threat"
|
109 |
]
|
110 |
-
|
111 |
THRESHOLDS = {
|
112 |
"blame shifting": 0.28,
|
113 |
"contradictory statements": 0.27,
|
@@ -125,14 +86,13 @@ THRESHOLDS = {
|
|
125 |
# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
|
126 |
ocr_reader = easyocr.Reader(["en"], gpu=False)
|
127 |
|
128 |
-
|
129 |
# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
|
130 |
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
|
131 |
"""
|
132 |
Assigns one of 18 nuanced tone categories based on
|
133 |
-
model scores, NRC-EmoLex counts, detected patterns, and text.
|
134 |
"""
|
135 |
-
# unpack
|
136 |
sadness = emotion_profile.get("sadness", 0)
|
137 |
joy = emotion_profile.get("joy", 0)
|
138 |
neutral = emotion_profile.get("neutral", 0)
|
@@ -141,13 +101,20 @@ def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
|
|
141 |
fear = emotion_profile.get("fear", 0)
|
142 |
surprise = emotion_profile.get("surprise", 0)
|
143 |
|
144 |
-
#
|
145 |
words = text_lower.split()
|
146 |
lex_counts = {
|
147 |
emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
|
148 |
for emo in ["anger","joy","sadness","fear","disgust"]
|
149 |
}
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
# 0. Support override
|
152 |
if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
|
153 |
return "supportive"
|
@@ -262,48 +229,51 @@ def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
|
|
262 |
|
263 |
return None
|
264 |
|
265 |
-
# ——— 5) Single
|
266 |
def analyze_message(text):
|
267 |
text_lower = text.lower()
|
268 |
emotion_profile = get_emotion_profile(text)
|
269 |
-
# 2a. get lexicon counts
|
270 |
-
lex_counts = score_emolex(text_lower)
|
271 |
-
max_lex = max(lex_counts.values()) or 1.0 # avoid div0
|
272 |
-
|
273 |
-
# 2b. normalize them to [0,1]
|
274 |
-
lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()}
|
275 |
|
276 |
-
#
|
|
|
|
|
|
|
277 |
for emo in emotion_profile:
|
278 |
-
emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,
|
|
|
|
|
279 |
toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
280 |
with torch.no_grad():
|
281 |
logits = model(**toks).logits.squeeze(0)
|
282 |
scores = torch.sigmoid(logits).cpu().numpy()
|
283 |
-
active_patterns = [
|
284 |
if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
|
285 |
active_patterns.append("recovery phase")
|
|
|
286 |
tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
|
287 |
-
return {
|
|
|
|
|
|
|
|
|
288 |
|
289 |
# ——— 6) Composite wrapper ———————————————————————————————————————————————
|
290 |
def analyze_composite(uploaded_file, *texts):
|
291 |
outputs = []
|
|
|
|
|
292 |
if uploaded_file is not None:
|
293 |
try:
|
294 |
raw = uploaded_file.read()
|
295 |
-
except
|
296 |
with open(uploaded_file, "rb") as f:
|
297 |
raw = f.read()
|
298 |
|
299 |
-
name = (
|
300 |
-
|
301 |
-
)
|
302 |
-
if name.endswith((".png",".jpg",".jpeg",".tiff",".bmp",".gif")):
|
303 |
img = Image.open(io.BytesIO(raw))
|
304 |
arr = np.array(img.convert("RGB"))
|
305 |
-
|
306 |
-
content = "\n".join(texts_ocr)
|
307 |
else:
|
308 |
try:
|
309 |
content = raw.decode("utf-8")
|
@@ -317,6 +287,8 @@ def analyze_composite(uploaded_file, *texts):
|
|
317 |
f"Active Patterns : {r['active_patterns']}\n"
|
318 |
f"Emotional Tone : {r['tone_tag']}\n"
|
319 |
)
|
|
|
|
|
320 |
for idx, txt in enumerate(texts, start=1):
|
321 |
if not txt:
|
322 |
continue
|
@@ -327,6 +299,7 @@ def analyze_composite(uploaded_file, *texts):
|
|
327 |
f"Active Patterns : {r['active_patterns']}\n"
|
328 |
f"Emotional Tone : {r['tone_tag']}\n"
|
329 |
)
|
|
|
330 |
if not outputs:
|
331 |
return "Please enter at least one message."
|
332 |
return "\n".join(outputs)
|
@@ -342,4 +315,4 @@ iface = gr.Interface(
|
|
342 |
)
|
343 |
|
344 |
if __name__ == "__main__":
|
345 |
-
iface.launch()
|
|
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
9 |
|
|
|
10 |
# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
|
|
|
11 |
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
|
|
|
|
|
12 |
emo_raw = pd.read_csv(
|
13 |
EMOLEX_PATH,
|
14 |
sep="\t",
|
15 |
names=["word","emotion","flag"],
|
16 |
+
comment="#",
|
17 |
header=None
|
18 |
)
|
|
|
|
|
19 |
emo_df = (
|
20 |
emo_raw
|
21 |
.pivot(index="word", columns="emotion", values="flag")
|
22 |
.fillna(0)
|
23 |
.astype(int)
|
24 |
)
|
|
|
|
|
25 |
EMOLEX = emo_df.to_dict(orient="index")
|
26 |
+
|
27 |
def score_emolex(text_lower):
|
|
|
28 |
counts = {emo: 0 for emo in emo_df.columns}
|
29 |
for tok in text_lower.split():
|
30 |
if tok in EMOLEX:
|
31 |
for emo, flag in EMOLEX[tok].items():
|
32 |
counts[emo] += flag
|
33 |
return counts
|
|
|
34 |
|
35 |
# ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
|
36 |
MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
|
|
|
|
|
37 |
mpqa_lex = {}
|
38 |
with open(MPQA_PATH, encoding="utf-8") as f:
|
39 |
for line in f:
|
40 |
line = line.strip()
|
41 |
if not line or line.startswith("#"):
|
42 |
continue
|
|
|
43 |
fields = dict(item.split("=",1) for item in line.split())
|
44 |
w = fields.pop("word1").lower()
|
45 |
mpqa_lex.setdefault(w, []).append(fields)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
|
48 |
emotion_pipeline = hf_pipeline(
|
49 |
"text-classification",
|
|
|
51 |
top_k=None,
|
52 |
truncation=True
|
53 |
)
|
|
|
54 |
def get_emotion_profile(text):
|
55 |
results = emotion_pipeline(text)
|
56 |
if isinstance(results, list) and isinstance(results[0], list):
|
57 |
results = results[0]
|
58 |
return {r["label"].lower(): round(r["score"], 3) for r in results}
|
59 |
|
|
|
60 |
APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
|
61 |
|
62 |
# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
|
|
|
69 |
"gaslighting", "guilt tripping", "insults", "obscure language",
|
70 |
"projection", "recovery phase", "threat"
|
71 |
]
|
|
|
72 |
THRESHOLDS = {
|
73 |
"blame shifting": 0.28,
|
74 |
"contradictory statements": 0.27,
|
|
|
86 |
# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
|
87 |
ocr_reader = easyocr.Reader(["en"], gpu=False)
|
88 |
|
|
|
89 |
# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
|
90 |
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
|
91 |
"""
|
92 |
Assigns one of 18 nuanced tone categories based on
|
93 |
+
model scores, NRC-EmoLex counts, MPQA counts, detected patterns, and text.
|
94 |
"""
|
95 |
+
# unpack transformer scores
|
96 |
sadness = emotion_profile.get("sadness", 0)
|
97 |
joy = emotion_profile.get("joy", 0)
|
98 |
neutral = emotion_profile.get("neutral", 0)
|
|
|
101 |
fear = emotion_profile.get("fear", 0)
|
102 |
surprise = emotion_profile.get("surprise", 0)
|
103 |
|
104 |
+
# NRC-EmoLex counts
|
105 |
words = text_lower.split()
|
106 |
lex_counts = {
|
107 |
emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
|
108 |
for emo in ["anger","joy","sadness","fear","disgust"]
|
109 |
}
|
110 |
|
111 |
+
# MPQA counts
|
112 |
+
mpqa_counts = {"strongsubj":0,"weaksubj":0,"positive":0,"negative":0}
|
113 |
+
for w in words:
|
114 |
+
for entry in mpqa_lex.get(w, []):
|
115 |
+
mpqa_counts[entry["type"]] += 1
|
116 |
+
mpqa_counts[entry["priorpolarity"]] += 1
|
117 |
+
|
118 |
# 0. Support override
|
119 |
if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
|
120 |
return "supportive"
|
|
|
229 |
|
230 |
return None
|
231 |
|
232 |
+
# ——— 5) Single-message analysis ———————————————————————————————————————————
|
233 |
def analyze_message(text):
|
234 |
text_lower = text.lower()
|
235 |
emotion_profile = get_emotion_profile(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
+
# blend in NRC-EmoLex scores
|
238 |
+
lex_counts = score_emolex(text_lower)
|
239 |
+
max_lex = max(lex_counts.values()) or 1.0
|
240 |
+
lex_scores = {emo: cnt/ max_lex for emo, cnt in lex_counts.items()}
|
241 |
for emo in emotion_profile:
|
242 |
+
emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))
|
243 |
+
|
244 |
+
# abuse-patterns
|
245 |
toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
246 |
with torch.no_grad():
|
247 |
logits = model(**toks).logits.squeeze(0)
|
248 |
scores = torch.sigmoid(logits).cpu().numpy()
|
249 |
+
active_patterns = [lab for lab, sc in zip(LABELS, scores) if sc >= THRESHOLDS[lab]]
|
250 |
if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
|
251 |
active_patterns.append("recovery phase")
|
252 |
+
|
253 |
tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
|
254 |
+
return {
|
255 |
+
"emotion_profile": emotion_profile,
|
256 |
+
"active_patterns": active_patterns,
|
257 |
+
"tone_tag": tone_tag
|
258 |
+
}
|
259 |
|
260 |
# ——— 6) Composite wrapper ———————————————————————————————————————————————
|
261 |
def analyze_composite(uploaded_file, *texts):
|
262 |
outputs = []
|
263 |
+
|
264 |
+
# file OCR / text handling
|
265 |
if uploaded_file is not None:
|
266 |
try:
|
267 |
raw = uploaded_file.read()
|
268 |
+
except:
|
269 |
with open(uploaded_file, "rb") as f:
|
270 |
raw = f.read()
|
271 |
|
272 |
+
name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
|
273 |
+
if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
|
|
|
|
|
274 |
img = Image.open(io.BytesIO(raw))
|
275 |
arr = np.array(img.convert("RGB"))
|
276 |
+
content = "\n".join(ocr_reader.readtext(arr, detail=0))
|
|
|
277 |
else:
|
278 |
try:
|
279 |
content = raw.decode("utf-8")
|
|
|
287 |
f"Active Patterns : {r['active_patterns']}\n"
|
288 |
f"Emotional Tone : {r['tone_tag']}\n"
|
289 |
)
|
290 |
+
|
291 |
+
# inline text messages
|
292 |
for idx, txt in enumerate(texts, start=1):
|
293 |
if not txt:
|
294 |
continue
|
|
|
299 |
f"Active Patterns : {r['active_patterns']}\n"
|
300 |
f"Emotional Tone : {r['tone_tag']}\n"
|
301 |
)
|
302 |
+
|
303 |
if not outputs:
|
304 |
return "Please enter at least one message."
|
305 |
return "\n".join(outputs)
|
|
|
315 |
)
|
316 |
|
317 |
if __name__ == "__main__":
|
318 |
+
iface.launch()
|