SamanthaStorm commited on
Commit
a37b7df
·
verified ·
1 Parent(s): 7020ff2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -8
app.py CHANGED
@@ -7,14 +7,6 @@ import easyocr
7
  import numpy as np
8
  import pandas as pd
9
 
10
- mpqa = {}
11
- with open("subj_lexicon.tff") as f:
12
- for line in f:
13
- fields = dict(tok.split("=") for tok in line.strip().split())
14
- word = fields["word1"]
15
- strength = fields["type"] # “strongsubj” vs “weaksubj”
16
- polarity = fields["priorpolarity"] # “positive” or “negative”
17
- mpqa[word] = (strength, polarity)
18
 
19
  # ——— Load and preprocess NRC EmoLex ——————————————————————————————————
20
  # Make sure this filename matches exactly what you’ve uploaded
@@ -47,7 +39,47 @@ def score_emolex(text_lower):
47
  for emo, flag in EMOLEX[tok].items():
48
  counts[emo] += flag
49
  return counts
 
 
 
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # ——— 1) Emotion Pipeline ————————————————————————————————————————————————
52
  emotion_pipeline = hf_pipeline(
53
  "text-classification",
 
7
  import numpy as np
8
  import pandas as pd
9
 
 
 
 
 
 
 
 
 
10
 
11
  # ——— Load and preprocess NRC EmoLex ——————————————————————————————————
12
  # Make sure this filename matches exactly what you’ve uploaded
 
39
  for emo, flag in EMOLEX[tok].items():
40
  counts[emo] += flag
41
  return counts
42
+ import re
43
+
44
+ # ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
45
+ MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
46
 
47
+ # mpqa_lex[word] = list of feature‐dicts for that word
48
+ mpqa_lex = {}
49
+ with open(MPQA_PATH, encoding="utf-8") as f:
50
+ for line in f:
51
+ line = line.strip()
52
+ if not line or line.startswith("#"):
53
+ continue
54
+ # each line looks like: type=strongsubj len=1 word1=abandon pos1=verb stemmed1=y priorpolarity=negative
55
+ fields = dict(item.split("=",1) for item in line.split())
56
+ w = fields.pop("word1").lower()
57
+ mpqa_lex.setdefault(w, []).append(fields)
58
+
59
+ # e.g. mpqa_lex["abandon"] == [ {'type':'strongsubj','len':'1','pos1':'verb','stemmed1':'y','priorpolarity':'negative'} ]
60
+
61
+ # ——— In your get_emotional_tone_tag, just after you split words… ——————————————————————
62
+ words = text_lower.split()
63
+
64
+ # count MPQA hits
65
+ mpqa_counts = {
66
+ "strongsubj": 0,
67
+ "weaksubj": 0,
68
+ "positive": 0,
69
+ "negative": 0,
70
+ }
71
+ for w in words:
72
+ for entry in mpqa_lex.get(w, []):
73
+ mpqa_counts[ entry["type"] ] += 1
74
+ mpqa_counts[ entry["priorpolarity"] ] += 1
75
+
76
+ # now you can reference mpqa_counts["negative"], etc.
77
+ # for example, tweak your “Emotional Threat” rule to require at least one strong negative subj:
78
+ if (anger + disgust) > 0.5 \
79
+ and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
80
+ and mpqa_counts["strongsubj"] > 0 \
81
+ and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
82
+ return "emotional threat"
83
  # ——— 1) Emotion Pipeline ————————————————————————————————————————————————
84
  emotion_pipeline = hf_pipeline(
85
  "text-classification",