Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -40,7 +40,18 @@ with open(MPQA_PATH, encoding="utf-8") as f:
|
|
40 |
line = line.strip()
|
41 |
if not line or line.startswith("#"):
|
42 |
continue
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
w = fields.pop("word1").lower()
|
45 |
mpqa_lex.setdefault(w, []).append(fields)
|
46 |
|
@@ -88,11 +99,6 @@ ocr_reader = easyocr.Reader(["en"], gpu=False)
|
|
88 |
|
89 |
# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
|
90 |
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
|
91 |
-
"""
|
92 |
-
Assigns one of 18 nuanced tone categories based on
|
93 |
-
model scores, NRC-EmoLex counts, MPQA counts, detected patterns, and text.
|
94 |
-
"""
|
95 |
-
# unpack transformer scores
|
96 |
sadness = emotion_profile.get("sadness", 0)
|
97 |
joy = emotion_profile.get("joy", 0)
|
98 |
neutral = emotion_profile.get("neutral", 0)
|
@@ -234,15 +240,15 @@ def analyze_message(text):
|
|
234 |
text_lower = text.lower()
|
235 |
emotion_profile = get_emotion_profile(text)
|
236 |
|
237 |
-
# blend in NRC-EmoLex
|
238 |
lex_counts = score_emolex(text_lower)
|
239 |
max_lex = max(lex_counts.values()) or 1.0
|
240 |
-
lex_scores = {emo: cnt/
|
241 |
for emo in emotion_profile:
|
242 |
emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))
|
243 |
|
244 |
# abuse-patterns
|
245 |
-
toks
|
246 |
with torch.no_grad():
|
247 |
logits = model(**toks).logits.squeeze(0)
|
248 |
scores = torch.sigmoid(logits).cpu().numpy()
|
@@ -261,7 +267,7 @@ def analyze_message(text):
|
|
261 |
def analyze_composite(uploaded_file, *texts):
|
262 |
outputs = []
|
263 |
|
264 |
-
# file
|
265 |
if uploaded_file is not None:
|
266 |
try:
|
267 |
raw = uploaded_file.read()
|
@@ -271,8 +277,8 @@ def analyze_composite(uploaded_file, *texts):
|
|
271 |
|
272 |
name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
|
273 |
if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
|
274 |
-
img
|
275 |
-
arr
|
276 |
content = "\n".join(ocr_reader.readtext(arr, detail=0))
|
277 |
else:
|
278 |
try:
|
@@ -288,7 +294,7 @@ def analyze_composite(uploaded_file, *texts):
|
|
288 |
f"Emotional Tone : {r['tone_tag']}\n"
|
289 |
)
|
290 |
|
291 |
-
#
|
292 |
for idx, txt in enumerate(texts, start=1):
|
293 |
if not txt:
|
294 |
continue
|
@@ -306,6 +312,7 @@ def analyze_composite(uploaded_file, *texts):
|
|
306 |
|
307 |
# ——— 7) Gradio interface ———————————————————————————————————————————————
|
308 |
message_inputs = [gr.Textbox(label="Message")]
|
|
|
309 |
iface = gr.Interface(
|
310 |
fn=analyze_composite,
|
311 |
inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
|
|
|
40 |
line = line.strip()
|
41 |
if not line or line.startswith("#"):
|
42 |
continue
|
43 |
+
|
44 |
+
# build fields dict but skip any token without '='
|
45 |
+
fields = {}
|
46 |
+
for item in line.split():
|
47 |
+
if "=" not in item:
|
48 |
+
continue
|
49 |
+
key, val = item.split("=", 1)
|
50 |
+
fields[key] = val
|
51 |
+
|
52 |
+
# must have word1
|
53 |
+
if "word1" not in fields:
|
54 |
+
continue
|
55 |
w = fields.pop("word1").lower()
|
56 |
mpqa_lex.setdefault(w, []).append(fields)
|
57 |
|
|
|
99 |
|
100 |
# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
|
101 |
def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
|
|
|
|
|
|
|
|
|
|
|
102 |
sadness = emotion_profile.get("sadness", 0)
|
103 |
joy = emotion_profile.get("joy", 0)
|
104 |
neutral = emotion_profile.get("neutral", 0)
|
|
|
240 |
text_lower = text.lower()
|
241 |
emotion_profile = get_emotion_profile(text)
|
242 |
|
243 |
+
# blend in NRC-EmoLex
|
244 |
lex_counts = score_emolex(text_lower)
|
245 |
max_lex = max(lex_counts.values()) or 1.0
|
246 |
+
lex_scores = {emo: cnt/max_lex for emo, cnt in lex_counts.items()}
|
247 |
for emo in emotion_profile:
|
248 |
emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))
|
249 |
|
250 |
# abuse-patterns
|
251 |
+
toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
252 |
with torch.no_grad():
|
253 |
logits = model(**toks).logits.squeeze(0)
|
254 |
scores = torch.sigmoid(logits).cpu().numpy()
|
|
|
267 |
def analyze_composite(uploaded_file, *texts):
|
268 |
outputs = []
|
269 |
|
270 |
+
# file handling / OCR
|
271 |
if uploaded_file is not None:
|
272 |
try:
|
273 |
raw = uploaded_file.read()
|
|
|
277 |
|
278 |
name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
|
279 |
if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
|
280 |
+
img = Image.open(io.BytesIO(raw))
|
281 |
+
arr = np.array(img.convert("RGB"))
|
282 |
content = "\n".join(ocr_reader.readtext(arr, detail=0))
|
283 |
else:
|
284 |
try:
|
|
|
294 |
f"Emotional Tone : {r['tone_tag']}\n"
|
295 |
)
|
296 |
|
297 |
+
# free-text messages
|
298 |
for idx, txt in enumerate(texts, start=1):
|
299 |
if not txt:
|
300 |
continue
|
|
|
312 |
|
313 |
# ——— 7) Gradio interface ———————————————————————————————————————————————
|
314 |
message_inputs = [gr.Textbox(label="Message")]
|
315 |
+
|
316 |
iface = gr.Interface(
|
317 |
fn=analyze_composite,
|
318 |
inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
|