Spaces:

SamanthaStorm
/

TetherSST

Sleeping

App Files Files Community

SamanthaStorm commited on May 5

Commit

4db4868

verified ·

1 Parent(s): 1b563a8

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -64

app.py CHANGED Viewed

@@ -7,79 +7,43 @@ import easyocr
 import numpy as np
 import pandas as pd
 # ——— Load and preprocess NRC EmoLex ——————————————————————————————————
-# Make sure this filename matches exactly what you’ve uploaded
 EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
-# Load the raw triples
 emo_raw = pd.read_csv(
     EMOLEX_PATH,
     sep="\t",
     names=["word","emotion","flag"],
-    comment="#",      # skip any commented lines
     header=None
 )
-# Pivot: word → { emotion: 0 or 1, … }
 emo_df = (
     emo_raw
     .pivot(index="word", columns="emotion", values="flag")
     .fillna(0)
     .astype(int)
 )
-# Final lookup dict: EMOLEX["happy"]["joy"] == 1
 EMOLEX = emo_df.to_dict(orient="index")
 def score_emolex(text_lower):
-    # count how many times each emotion appears in the lexicon
     counts = {emo: 0 for emo in emo_df.columns}
     for tok in text_lower.split():
         if tok in EMOLEX:
             for emo, flag in EMOLEX[tok].items():
                 counts[emo] += flag
     return counts
-import re
 # ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
 MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
-# mpqa_lex[word] = list of feature‐dicts for that word
 mpqa_lex = {}
 with open(MPQA_PATH, encoding="utf-8") as f:
     for line in f:
         line = line.strip()
         if not line or line.startswith("#"):
             continue
-        # each line looks like: type=strongsubj len=1 word1=abandon pos1=verb stemmed1=y priorpolarity=negative
         fields = dict(item.split("=",1) for item in line.split())
         w = fields.pop("word1").lower()
         mpqa_lex.setdefault(w, []).append(fields)
-# e.g. mpqa_lex["abandon"] == [ {'type':'strongsubj','len':'1','pos1':'verb','stemmed1':'y','priorpolarity':'negative'} ]
-# ——— In your get_emotional_tone_tag, just after you split words… ——————————————————————
-words = text_lower.split()
-# count MPQA hits
-mpqa_counts = {
-    "strongsubj":  0,
-    "weaksubj":    0,
-    "positive":    0,
-    "negative":    0,
-}
-for w in words:
-    for entry in mpqa_lex.get(w, []):
-        mpqa_counts[ entry["type"] ]      += 1
-        mpqa_counts[ entry["priorpolarity"] ] += 1
-# now you can reference mpqa_counts["negative"], etc.
-# for example, tweak your “Emotional Threat” rule to require at least one strong negative subj:
-if (anger + disgust) > 0.5 \
-   and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
-   and mpqa_counts["strongsubj"] > 0 \
-   and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
-return "emotional threat"
 # ——— 1) Emotion Pipeline ————————————————————————————————————————————————
 emotion_pipeline = hf_pipeline(
     "text-classification",
@@ -87,14 +51,12 @@ emotion_pipeline = hf_pipeline(
     top_k=None,
     truncation=True
 )
 def get_emotion_profile(text):
     results = emotion_pipeline(text)
     if isinstance(results, list) and isinstance(results[0], list):
         results = results[0]
     return {r["label"].lower(): round(r["score"], 3) for r in results}
-# apology keywords for pleading concern
 APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
 # ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
@@ -107,7 +69,6 @@ LABELS = [
     "gaslighting", "guilt tripping", "insults", "obscure language",
     "projection", "recovery phase", "threat"
 ]
 THRESHOLDS = {
     "blame shifting": 0.28,
     "contradictory statements": 0.27,
@@ -125,14 +86,13 @@ THRESHOLDS = {
 # ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
 ocr_reader = easyocr.Reader(["en"], gpu=False)
 # ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
 def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
     """
     Assigns one of 18 nuanced tone categories based on
-    model scores, NRC-EmoLex counts, detected patterns, and text.
     """
-    # unpack model emotion scores
     sadness  = emotion_profile.get("sadness",  0)
     joy      = emotion_profile.get("joy",      0)
     neutral  = emotion_profile.get("neutral",  0)
@@ -141,13 +101,20 @@ def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
     fear     = emotion_profile.get("fear",     0)
     surprise = emotion_profile.get("surprise", 0)
-    # count lexicon hits for the big five
     words = text_lower.split()
     lex_counts = {
         emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
         for emo in ["anger","joy","sadness","fear","disgust"]
     }
     # 0. Support override
     if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
         return "supportive"
@@ -262,48 +229,51 @@ def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
     return None
-# ——— 5) Single message analysis ———————————————————————————————————————————
 def analyze_message(text):
     text_lower = text.lower()
     emotion_profile = get_emotion_profile(text)
-    # 2a. get lexicon counts
-    lex_counts = score_emolex(text_lower)
-    max_lex    = max(lex_counts.values()) or 1.0    # avoid div0
-    # 2b. normalize them to [0,1]
-    lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()}
-    # 2c. blend: take the max of transformer & lexicon
     for emo in emotion_profile:
-        emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo, 0))
     toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = model(**toks).logits.squeeze(0)
     scores = torch.sigmoid(logits).cpu().numpy()
-    active_patterns = [label for label, prob in zip(LABELS, scores) if prob >= THRESHOLDS[label]]
     if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
         active_patterns.append("recovery phase")
     tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
-    return {"emotion_profile": emotion_profile, "active_patterns": active_patterns, "tone_tag": tone_tag}
 # ——— 6) Composite wrapper ———————————————————————————————————————————————
 def analyze_composite(uploaded_file, *texts):
     outputs = []
     if uploaded_file is not None:
         try:
             raw = uploaded_file.read()
-        except Exception:
             with open(uploaded_file, "rb") as f:
                 raw = f.read()
-        name = (
-            uploaded_file.name.lower() if hasattr(uploaded_file, "name") else uploaded_file.lower()
-        )
-        if name.endswith((".png",".jpg",".jpeg",".tiff",".bmp",".gif")):
             img = Image.open(io.BytesIO(raw))
             arr = np.array(img.convert("RGB"))
-            texts_ocr = ocr_reader.readtext(arr, detail=0)
-            content = "\n".join(texts_ocr)
         else:
             try:
                 content = raw.decode("utf-8")
@@ -317,6 +287,8 @@ def analyze_composite(uploaded_file, *texts):
             f"Active Patterns : {r['active_patterns']}\n"
             f"Emotional Tone  : {r['tone_tag']}\n"
         )
     for idx, txt in enumerate(texts, start=1):
         if not txt:
             continue
@@ -327,6 +299,7 @@ def analyze_composite(uploaded_file, *texts):
             f"Active Patterns : {r['active_patterns']}\n"
             f"Emotional Tone  : {r['tone_tag']}\n"
         )
     if not outputs:
         return "Please enter at least one message."
     return "\n".join(outputs)
@@ -342,4 +315,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 import numpy as np
 import pandas as pd
 # ——— Load and preprocess NRC EmoLex ——————————————————————————————————
 EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
 emo_raw = pd.read_csv(
     EMOLEX_PATH,
     sep="\t",
     names=["word","emotion","flag"],
+    comment="#",
     header=None
 )
 emo_df = (
     emo_raw
     .pivot(index="word", columns="emotion", values="flag")
     .fillna(0)
     .astype(int)
 )
 EMOLEX = emo_df.to_dict(orient="index")
 def score_emolex(text_lower):
     counts = {emo: 0 for emo in emo_df.columns}
     for tok in text_lower.split():
         if tok in EMOLEX:
             for emo, flag in EMOLEX[tok].items():
                 counts[emo] += flag
     return counts
 # ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
 MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
 mpqa_lex = {}
 with open(MPQA_PATH, encoding="utf-8") as f:
     for line in f:
         line = line.strip()
         if not line or line.startswith("#"):
             continue
         fields = dict(item.split("=",1) for item in line.split())
         w = fields.pop("word1").lower()
         mpqa_lex.setdefault(w, []).append(fields)
 # ——— 1) Emotion Pipeline ————————————————————————————————————————————————
 emotion_pipeline = hf_pipeline(
     "text-classification",
     top_k=None,
     truncation=True
 )
 def get_emotion_profile(text):
     results = emotion_pipeline(text)
     if isinstance(results, list) and isinstance(results[0], list):
         results = results[0]
     return {r["label"].lower(): round(r["score"], 3) for r in results}
 APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
 # ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
     "gaslighting", "guilt tripping", "insults", "obscure language",
     "projection", "recovery phase", "threat"
 ]
 THRESHOLDS = {
     "blame shifting": 0.28,
     "contradictory statements": 0.27,
 # ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
 ocr_reader = easyocr.Reader(["en"], gpu=False)
 # ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
 def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
     """
     Assigns one of 18 nuanced tone categories based on
+    model scores, NRC-EmoLex counts, MPQA counts, detected patterns, and text.
     """
+    # unpack transformer scores
     sadness  = emotion_profile.get("sadness",  0)
     joy      = emotion_profile.get("joy",      0)
     neutral  = emotion_profile.get("neutral",  0)
     fear     = emotion_profile.get("fear",     0)
     surprise = emotion_profile.get("surprise", 0)
+    # NRC-EmoLex counts
     words = text_lower.split()
     lex_counts = {
         emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
         for emo in ["anger","joy","sadness","fear","disgust"]
     }
+    # MPQA counts
+    mpqa_counts = {"strongsubj":0,"weaksubj":0,"positive":0,"negative":0}
+    for w in words:
+        for entry in mpqa_lex.get(w, []):
+            mpqa_counts[entry["type"]]          += 1
+            mpqa_counts[entry["priorpolarity"]] += 1
     # 0. Support override
     if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
         return "supportive"
     return None
+# ——— 5) Single-message analysis ———————————————————————————————————————————
 def analyze_message(text):
     text_lower = text.lower()
     emotion_profile = get_emotion_profile(text)
+    # blend in NRC-EmoLex scores
+    lex_counts = score_emolex(text_lower)
+    max_lex    = max(lex_counts.values()) or 1.0
+    lex_scores = {emo: cnt/ max_lex for emo, cnt in lex_counts.items()}
     for emo in emotion_profile:
+        emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))
+    # abuse-patterns
     toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = model(**toks).logits.squeeze(0)
     scores = torch.sigmoid(logits).cpu().numpy()
+    active_patterns = [lab for lab, sc in zip(LABELS, scores) if sc >= THRESHOLDS[lab]]
     if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
         active_patterns.append("recovery phase")
     tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
+    return {
+        "emotion_profile": emotion_profile,
+        "active_patterns": active_patterns,
+        "tone_tag": tone_tag
+    }
 # ——— 6) Composite wrapper ———————————————————————————————————————————————
 def analyze_composite(uploaded_file, *texts):
     outputs = []
+    # file OCR / text handling
     if uploaded_file is not None:
         try:
             raw = uploaded_file.read()
+        except:
             with open(uploaded_file, "rb") as f:
                 raw = f.read()
+        name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
+        if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
             img = Image.open(io.BytesIO(raw))
             arr = np.array(img.convert("RGB"))
+            content = "\n".join(ocr_reader.readtext(arr, detail=0))
         else:
             try:
                 content = raw.decode("utf-8")
             f"Active Patterns : {r['active_patterns']}\n"
             f"Emotional Tone  : {r['tone_tag']}\n"
         )
+    # inline text messages
     for idx, txt in enumerate(texts, start=1):
         if not txt:
             continue
             f"Active Patterns : {r['active_patterns']}\n"
             f"Emotional Tone  : {r['tone_tag']}\n"
         )
     if not outputs:
         return "Please enter at least one message."
     return "\n".join(outputs)
 )
 if __name__ == "__main__":
+    iface.launch()