SamanthaStorm commited on
Commit
4db4868
·
verified ·
1 Parent(s): 1b563a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -64
app.py CHANGED
@@ -7,79 +7,43 @@ import easyocr
7
  import numpy as np
8
  import pandas as pd
9
 
10
-
11
  # ——— Load and preprocess NRC EmoLex ——————————————————————————————————
12
- # Make sure this filename matches exactly what you’ve uploaded
13
  EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
14
-
15
- # Load the raw triples
16
  emo_raw = pd.read_csv(
17
  EMOLEX_PATH,
18
  sep="\t",
19
  names=["word","emotion","flag"],
20
- comment="#", # skip any commented lines
21
  header=None
22
  )
23
-
24
- # Pivot: word → { emotion: 0 or 1, … }
25
  emo_df = (
26
  emo_raw
27
  .pivot(index="word", columns="emotion", values="flag")
28
  .fillna(0)
29
  .astype(int)
30
  )
31
-
32
- # Final lookup dict: EMOLEX["happy"]["joy"] == 1
33
  EMOLEX = emo_df.to_dict(orient="index")
 
34
  def score_emolex(text_lower):
35
- # count how many times each emotion appears in the lexicon
36
  counts = {emo: 0 for emo in emo_df.columns}
37
  for tok in text_lower.split():
38
  if tok in EMOLEX:
39
  for emo, flag in EMOLEX[tok].items():
40
  counts[emo] += flag
41
  return counts
42
- import re
43
 
44
  # ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
45
  MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
46
-
47
- # mpqa_lex[word] = list of feature‐dicts for that word
48
  mpqa_lex = {}
49
  with open(MPQA_PATH, encoding="utf-8") as f:
50
  for line in f:
51
  line = line.strip()
52
  if not line or line.startswith("#"):
53
  continue
54
- # each line looks like: type=strongsubj len=1 word1=abandon pos1=verb stemmed1=y priorpolarity=negative
55
  fields = dict(item.split("=",1) for item in line.split())
56
  w = fields.pop("word1").lower()
57
  mpqa_lex.setdefault(w, []).append(fields)
58
 
59
- # e.g. mpqa_lex["abandon"] == [ {'type':'strongsubj','len':'1','pos1':'verb','stemmed1':'y','priorpolarity':'negative'} ]
60
-
61
- # ——— In your get_emotional_tone_tag, just after you split words… ——————————————————————
62
- words = text_lower.split()
63
-
64
- # count MPQA hits
65
- mpqa_counts = {
66
- "strongsubj": 0,
67
- "weaksubj": 0,
68
- "positive": 0,
69
- "negative": 0,
70
- }
71
- for w in words:
72
- for entry in mpqa_lex.get(w, []):
73
- mpqa_counts[ entry["type"] ] += 1
74
- mpqa_counts[ entry["priorpolarity"] ] += 1
75
-
76
- # now you can reference mpqa_counts["negative"], etc.
77
- # for example, tweak your “Emotional Threat” rule to require at least one strong negative subj:
78
- if (anger + disgust) > 0.5 \
79
- and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
80
- and mpqa_counts["strongsubj"] > 0 \
81
- and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
82
- return "emotional threat"
83
  # ——— 1) Emotion Pipeline ————————————————————————————————————————————————
84
  emotion_pipeline = hf_pipeline(
85
  "text-classification",
@@ -87,14 +51,12 @@ emotion_pipeline = hf_pipeline(
87
  top_k=None,
88
  truncation=True
89
  )
90
-
91
  def get_emotion_profile(text):
92
  results = emotion_pipeline(text)
93
  if isinstance(results, list) and isinstance(results[0], list):
94
  results = results[0]
95
  return {r["label"].lower(): round(r["score"], 3) for r in results}
96
 
97
- # apology keywords for pleading concern
98
  APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
99
 
100
  # ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
@@ -107,7 +69,6 @@ LABELS = [
107
  "gaslighting", "guilt tripping", "insults", "obscure language",
108
  "projection", "recovery phase", "threat"
109
  ]
110
-
111
  THRESHOLDS = {
112
  "blame shifting": 0.28,
113
  "contradictory statements": 0.27,
@@ -125,14 +86,13 @@ THRESHOLDS = {
125
  # ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
126
  ocr_reader = easyocr.Reader(["en"], gpu=False)
127
 
128
-
129
  # ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
130
  def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
131
  """
132
  Assigns one of 18 nuanced tone categories based on
133
- model scores, NRC-EmoLex counts, detected patterns, and text.
134
  """
135
- # unpack model emotion scores
136
  sadness = emotion_profile.get("sadness", 0)
137
  joy = emotion_profile.get("joy", 0)
138
  neutral = emotion_profile.get("neutral", 0)
@@ -141,13 +101,20 @@ def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
141
  fear = emotion_profile.get("fear", 0)
142
  surprise = emotion_profile.get("surprise", 0)
143
 
144
- # count lexicon hits for the big five
145
  words = text_lower.split()
146
  lex_counts = {
147
  emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
148
  for emo in ["anger","joy","sadness","fear","disgust"]
149
  }
150
 
 
 
 
 
 
 
 
151
  # 0. Support override
152
  if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
153
  return "supportive"
@@ -262,48 +229,51 @@ def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
262
 
263
  return None
264
 
265
- # ——— 5) Single message analysis ———————————————————————————————————————————
266
  def analyze_message(text):
267
  text_lower = text.lower()
268
  emotion_profile = get_emotion_profile(text)
269
- # 2a. get lexicon counts
270
- lex_counts = score_emolex(text_lower)
271
- max_lex = max(lex_counts.values()) or 1.0 # avoid div0
272
-
273
- # 2b. normalize them to [0,1]
274
- lex_scores = {emo: cnt / max_lex for emo, cnt in lex_counts.items()}
275
 
276
- # 2c. blend: take the max of transformer & lexicon
 
 
 
277
  for emo in emotion_profile:
278
- emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo, 0))
 
 
279
  toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
280
  with torch.no_grad():
281
  logits = model(**toks).logits.squeeze(0)
282
  scores = torch.sigmoid(logits).cpu().numpy()
283
- active_patterns = [label for label, prob in zip(LABELS, scores) if prob >= THRESHOLDS[label]]
284
  if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
285
  active_patterns.append("recovery phase")
 
286
  tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
287
- return {"emotion_profile": emotion_profile, "active_patterns": active_patterns, "tone_tag": tone_tag}
 
 
 
 
288
 
289
  # ——— 6) Composite wrapper ———————————————————————————————————————————————
290
  def analyze_composite(uploaded_file, *texts):
291
  outputs = []
 
 
292
  if uploaded_file is not None:
293
  try:
294
  raw = uploaded_file.read()
295
- except Exception:
296
  with open(uploaded_file, "rb") as f:
297
  raw = f.read()
298
 
299
- name = (
300
- uploaded_file.name.lower() if hasattr(uploaded_file, "name") else uploaded_file.lower()
301
- )
302
- if name.endswith((".png",".jpg",".jpeg",".tiff",".bmp",".gif")):
303
  img = Image.open(io.BytesIO(raw))
304
  arr = np.array(img.convert("RGB"))
305
- texts_ocr = ocr_reader.readtext(arr, detail=0)
306
- content = "\n".join(texts_ocr)
307
  else:
308
  try:
309
  content = raw.decode("utf-8")
@@ -317,6 +287,8 @@ def analyze_composite(uploaded_file, *texts):
317
  f"Active Patterns : {r['active_patterns']}\n"
318
  f"Emotional Tone : {r['tone_tag']}\n"
319
  )
 
 
320
  for idx, txt in enumerate(texts, start=1):
321
  if not txt:
322
  continue
@@ -327,6 +299,7 @@ def analyze_composite(uploaded_file, *texts):
327
  f"Active Patterns : {r['active_patterns']}\n"
328
  f"Emotional Tone : {r['tone_tag']}\n"
329
  )
 
330
  if not outputs:
331
  return "Please enter at least one message."
332
  return "\n".join(outputs)
@@ -342,4 +315,4 @@ iface = gr.Interface(
342
  )
343
 
344
  if __name__ == "__main__":
345
- iface.launch()
 
7
  import numpy as np
8
  import pandas as pd
9
 
 
10
  # ——— Load and preprocess NRC EmoLex ——————————————————————————————————
 
11
  EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
 
 
12
  emo_raw = pd.read_csv(
13
  EMOLEX_PATH,
14
  sep="\t",
15
  names=["word","emotion","flag"],
16
+ comment="#",
17
  header=None
18
  )
 
 
19
  emo_df = (
20
  emo_raw
21
  .pivot(index="word", columns="emotion", values="flag")
22
  .fillna(0)
23
  .astype(int)
24
  )
 
 
25
  EMOLEX = emo_df.to_dict(orient="index")
26
+
27
  def score_emolex(text_lower):
 
28
  counts = {emo: 0 for emo in emo_df.columns}
29
  for tok in text_lower.split():
30
  if tok in EMOLEX:
31
  for emo, flag in EMOLEX[tok].items():
32
  counts[emo] += flag
33
  return counts
 
34
 
35
  # ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
36
  MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
 
 
37
  mpqa_lex = {}
38
  with open(MPQA_PATH, encoding="utf-8") as f:
39
  for line in f:
40
  line = line.strip()
41
  if not line or line.startswith("#"):
42
  continue
 
43
  fields = dict(item.split("=",1) for item in line.split())
44
  w = fields.pop("word1").lower()
45
  mpqa_lex.setdefault(w, []).append(fields)
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # ——— 1) Emotion Pipeline ————————————————————————————————————————————————
48
  emotion_pipeline = hf_pipeline(
49
  "text-classification",
 
51
  top_k=None,
52
  truncation=True
53
  )
 
54
  def get_emotion_profile(text):
55
  results = emotion_pipeline(text)
56
  if isinstance(results, list) and isinstance(results[0], list):
57
  results = results[0]
58
  return {r["label"].lower(): round(r["score"], 3) for r in results}
59
 
 
60
  APOLOGY_KEYWORDS = ["sorry", "apolog", "forgive"]
61
 
62
  # ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
 
69
  "gaslighting", "guilt tripping", "insults", "obscure language",
70
  "projection", "recovery phase", "threat"
71
  ]
 
72
  THRESHOLDS = {
73
  "blame shifting": 0.28,
74
  "contradictory statements": 0.27,
 
86
  # ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
87
  ocr_reader = easyocr.Reader(["en"], gpu=False)
88
 
 
89
  # ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
90
  def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
91
  """
92
  Assigns one of 18 nuanced tone categories based on
93
+ model scores, NRC-EmoLex counts, MPQA counts, detected patterns, and text.
94
  """
95
+ # unpack transformer scores
96
  sadness = emotion_profile.get("sadness", 0)
97
  joy = emotion_profile.get("joy", 0)
98
  neutral = emotion_profile.get("neutral", 0)
 
101
  fear = emotion_profile.get("fear", 0)
102
  surprise = emotion_profile.get("surprise", 0)
103
 
104
+ # NRC-EmoLex counts
105
  words = text_lower.split()
106
  lex_counts = {
107
  emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
108
  for emo in ["anger","joy","sadness","fear","disgust"]
109
  }
110
 
111
+ # MPQA counts
112
+ mpqa_counts = {"strongsubj":0,"weaksubj":0,"positive":0,"negative":0}
113
+ for w in words:
114
+ for entry in mpqa_lex.get(w, []):
115
+ mpqa_counts[entry["type"]] += 1
116
+ mpqa_counts[entry["priorpolarity"]] += 1
117
+
118
  # 0. Support override
119
  if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
120
  return "supportive"
 
229
 
230
  return None
231
 
232
+ # ——— 5) Single-message analysis ———————————————————————————————————————————
233
  def analyze_message(text):
234
  text_lower = text.lower()
235
  emotion_profile = get_emotion_profile(text)
 
 
 
 
 
 
236
 
237
+ # blend in NRC-EmoLex scores
238
+ lex_counts = score_emolex(text_lower)
239
+ max_lex = max(lex_counts.values()) or 1.0
240
+ lex_scores = {emo: cnt/ max_lex for emo, cnt in lex_counts.items()}
241
  for emo in emotion_profile:
242
+ emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))
243
+
244
+ # abuse-patterns
245
  toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
246
  with torch.no_grad():
247
  logits = model(**toks).logits.squeeze(0)
248
  scores = torch.sigmoid(logits).cpu().numpy()
249
+ active_patterns = [lab for lab, sc in zip(LABELS, scores) if sc >= THRESHOLDS[lab]]
250
  if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
251
  active_patterns.append("recovery phase")
252
+
253
  tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
254
+ return {
255
+ "emotion_profile": emotion_profile,
256
+ "active_patterns": active_patterns,
257
+ "tone_tag": tone_tag
258
+ }
259
 
260
  # ——— 6) Composite wrapper ———————————————————————————————————————————————
261
  def analyze_composite(uploaded_file, *texts):
262
  outputs = []
263
+
264
+ # file OCR / text handling
265
  if uploaded_file is not None:
266
  try:
267
  raw = uploaded_file.read()
268
+ except:
269
  with open(uploaded_file, "rb") as f:
270
  raw = f.read()
271
 
272
+ name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
273
+ if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
 
 
274
  img = Image.open(io.BytesIO(raw))
275
  arr = np.array(img.convert("RGB"))
276
+ content = "\n".join(ocr_reader.readtext(arr, detail=0))
 
277
  else:
278
  try:
279
  content = raw.decode("utf-8")
 
287
  f"Active Patterns : {r['active_patterns']}\n"
288
  f"Emotional Tone : {r['tone_tag']}\n"
289
  )
290
+
291
+ # inline text messages
292
  for idx, txt in enumerate(texts, start=1):
293
  if not txt:
294
  continue
 
299
  f"Active Patterns : {r['active_patterns']}\n"
300
  f"Emotional Tone : {r['tone_tag']}\n"
301
  )
302
+
303
  if not outputs:
304
  return "Please enter at least one message."
305
  return "\n".join(outputs)
 
315
  )
316
 
317
  if __name__ == "__main__":
318
+ iface.launch()