Spaces:

SamanthaStorm
/

TetherSST

Sleeping

App Files Files Community

SamanthaStorm commited on May 5

Commit

dd699d2

verified ·

1 Parent(s): 5f117b7

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -1

app.py CHANGED Viewed

@@ -5,6 +5,39 @@ from PIL import Image
 import io
 import easyocr
 import numpy as np
 # ——— 1) Emotion Pipeline ————————————————————————————————————————————————
 emotion_pipeline = hf_pipeline(
@@ -146,7 +179,17 @@ def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
 # ——— 5) Single message analysis ———————————————————————————————————————————
 def analyze_message(text):
     text_lower = text.lower()
-    emotion_profile = get_emotion_profile(text)
     toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = model(**toks).logits.squeeze(0)

 import io
 import easyocr
 import numpy as np
+import pandas as pd
+# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
+# Make sure this filename matches exactly what you’ve uploaded
+EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
+# Load the raw triples
+emo_raw = pd.read_csv(
+    EMOLEX_PATH,
+    sep="\t",
+    names=["word","emotion","flag"],
+    comment="#",      # skip any commented lines
+    header=None
+)
+# Pivot: word → { emotion: 0 or 1, … }
+emo_df = (
+    emo_raw
+    .pivot(index="word", columns="emotion", values="flag")
+    .fillna(0)
+    .astype(int)
+)
+# Final lookup dict: EMOLEX["happy"]["joy"] == 1
+EMOLEX = emo_df.to_dict(orient="index")
+def score_emolex(text_lower):
+    # count how many times each emotion appears in the lexicon
+    counts = {emo: 0 for emo in emo_df.columns}
+    for tok in text_lower.split():
+        if tok in EMOLEX:
+            for emo, flag in EMOLEX[tok].items():
+                counts[emo] += flag
+    return counts
 # ——— 1) Emotion Pipeline ————————————————————————————————————————————————
 emotion_pipeline = hf_pipeline(
 # ——— 5) Single message analysis ———————————————————————————————————————————
 def analyze_message(text):
     text_lower = text.lower()
+    emotion_profile = get_emotion_profile(text
+                                         # 2a. get lexicon counts
+    lex_counts = score_emolex(text_lower)
+    max_lex    = max(lex_counts.values()) or 1.0    # avoid div0
+    # 2b. normalize them to [0,1]
+    lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()}
+    # 2c. blend: take the max of transformer & lexicon
+    for emo in emotion_profile:
+        emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo, 0))
     toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = model(**toks).logits.squeeze(0)