Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,39 @@ from PIL import Image
|
|
5 |
import io
|
6 |
import easyocr
|
7 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
|
10 |
emotion_pipeline = hf_pipeline(
|
@@ -146,7 +179,17 @@ def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
|
|
146 |
# ——— 5) Single message analysis ———————————————————————————————————————————
|
147 |
def analyze_message(text):
|
148 |
text_lower = text.lower()
|
149 |
-
emotion_profile = get_emotion_profile(text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
151 |
with torch.no_grad():
|
152 |
logits = model(**toks).logits.squeeze(0)
|
|
|
5 |
import io
|
6 |
import easyocr
|
7 |
import numpy as np
|
8 |
+
import pandas as pd
|
9 |
+
|
10 |
+
# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
|
11 |
+
# Make sure this filename matches exactly what you’ve uploaded
|
12 |
+
EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
|
13 |
+
|
14 |
+
# Load the raw triples
|
15 |
+
emo_raw = pd.read_csv(
|
16 |
+
EMOLEX_PATH,
|
17 |
+
sep="\t",
|
18 |
+
names=["word","emotion","flag"],
|
19 |
+
comment="#", # skip any commented lines
|
20 |
+
header=None
|
21 |
+
)
|
22 |
+
|
23 |
+
# Pivot: word → { emotion: 0 or 1, … }
|
24 |
+
emo_df = (
|
25 |
+
emo_raw
|
26 |
+
.pivot(index="word", columns="emotion", values="flag")
|
27 |
+
.fillna(0)
|
28 |
+
.astype(int)
|
29 |
+
)
|
30 |
+
|
31 |
+
# Final lookup dict: EMOLEX["happy"]["joy"] == 1
|
32 |
+
EMOLEX = emo_df.to_dict(orient="index")
|
33 |
+
def score_emolex(text_lower):
|
34 |
+
# count how many times each emotion appears in the lexicon
|
35 |
+
counts = {emo: 0 for emo in emo_df.columns}
|
36 |
+
for tok in text_lower.split():
|
37 |
+
if tok in EMOLEX:
|
38 |
+
for emo, flag in EMOLEX[tok].items():
|
39 |
+
counts[emo] += flag
|
40 |
+
return counts
|
41 |
|
42 |
# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
|
43 |
emotion_pipeline = hf_pipeline(
|
|
|
179 |
# ——— 5) Single message analysis ———————————————————————————————————————————
|
180 |
def analyze_message(text):
|
181 |
text_lower = text.lower()
|
182 |
+
emotion_profile = get_emotion_profile(text
|
183 |
+
# 2a. get lexicon counts
|
184 |
+
lex_counts = score_emolex(text_lower)
|
185 |
+
max_lex = max(lex_counts.values()) or 1.0 # avoid div0
|
186 |
+
|
187 |
+
# 2b. normalize them to [0,1]
|
188 |
+
lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()}
|
189 |
+
|
190 |
+
# 2c. blend: take the max of transformer & lexicon
|
191 |
+
for emo in emotion_profile:
|
192 |
+
emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo, 0))
|
193 |
toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
194 |
with torch.no_grad():
|
195 |
logits = model(**toks).logits.squeeze(0)
|