RobPruzan commited on
Commit
f005347
·
1 Parent(s): b58b3e7

Optimizations and more interpretation

Browse files
Files changed (1) hide show
  1. app.py +38 -40
app.py CHANGED
@@ -66,6 +66,7 @@ def calculate_diversity(text):
66
  tokenized_text = word_tokenize(text)
67
 
68
  tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
 
69
  sim_words = {}
70
  if len(tokenized_text) <= 1:
71
  return 1, "More Text Required"
@@ -85,7 +86,7 @@ def calculate_diversity(text):
85
  if not comp.isalpha():
86
  continue
87
  try:
88
- if cosine_similarity(w2v[anc].reshape(1, -1), w2v[comp].reshape(1, -1)) > .7 or comp in wn_syns(anc):
89
  vocab.append(comp)
90
  except KeyError:
91
  continue
@@ -96,9 +97,6 @@ def calculate_diversity(text):
96
  if len(value) == 1:
97
  scores[key] = -1
98
  continue
99
- # if len(value) == 2:
100
- # scores[key] = -1
101
- # continue
102
  t_sim = len(value)
103
  t_rep = (len(value)) - (len(set(value)))
104
 
@@ -108,24 +106,39 @@ def calculate_diversity(text):
108
 
109
  mean_score = 0
110
  total = 0
111
-
112
  for value in scores.values():
113
  if value == -1:
114
  continue
115
  mean_score += value
116
  total += 1
117
- try:
118
- return scores, {"Diversity Score": mean_score / total}
119
- except ZeroDivisionError:
120
- return scores, {"Dviersity Score": "Not Enough Data"}
121
 
 
 
 
 
122
 
123
- def get_scores(text):
124
- return calculate_diversity(text)[0]
 
 
125
 
 
 
126
 
127
- def get_mean_score(text):
128
- return calculate_diversity(text)[1]
 
 
 
 
 
 
 
 
 
 
129
 
130
 
131
  def dict_to_list(dictionary, max_size=10):
@@ -314,16 +327,6 @@ def plot():
314
  return heatmap(diversity, df)
315
 
316
 
317
- def diversity_inter(text):
318
- words = word_tokenize(text)
319
- scores = get_scores(text)
320
- interpret_values = [('', 0.0)]
321
- for key, value in scores.items():
322
- interpret_values.append((words[key], value))
323
- interpret_values.append(('', 0.0))
324
- print(interpret_values)
325
- return {'original': text, 'interpretation': interpret_values}
326
-
327
 
328
  def sliding_window(text):
329
  words = word_tokenize(text)
@@ -374,25 +377,16 @@ def sliding_window(text):
374
  ax.set_facecolor('w')
375
  fig = plt.gcf()
376
 
377
- map = [('', 0)]
378
  maxy = max(inter_scores)
379
  miny = min(inter_scores)
380
  spread = maxy - miny
381
 
382
  for idx, i in enumerate(words):
383
- map.append((i, (inter_scores[idx] - miny) / spread))
384
- map.append(('', 0))
385
-
386
- return fig, map
387
-
388
-
389
- def get_plot(text):
390
- return sliding_window(text)[0]
391
-
392
-
393
- def get_dif_inter(text):
394
- return {'original': text, 'interpretation': sliding_window(text)[1]}
395
 
 
396
 
397
  def speech_to_text(speech, target):
398
  text = p(speech)["text"]
@@ -460,6 +454,11 @@ with gr.Blocks(title="Automatic Literacy and Speech Assesmen") as demo:
460
  div_output = gr.Label(label='Diversity Score', show_label=False)
461
  gr.Markdown("Diversity Heatmap | Blue cells are omitted from score | Darker = More Diverse")
462
  interpretation = gr.components.Interpretation(in_text, label="Diversity Heatmap")
 
 
 
 
 
463
  with gr.Box():
464
  gr.Markdown("Relative Difficulty Heatmap- How confusing the text is in that area")
465
  interpretation2 = gr.components.Interpretation(in_text, label="Difficulty Heatmap")
@@ -498,11 +497,10 @@ with gr.Blocks(title="Automatic Literacy and Speech Assesmen") as demo:
498
 
499
 
500
  grade.click(reading_difficulty, inputs=in_text, outputs=diff_output)
501
- grade.click(get_mean_score, inputs=in_text, outputs=div_output)
502
- grade.click(diversity_inter, inputs=in_text, outputs=interpretation)
503
- grade.click(get_dif_inter, inputs=in_text, outputs=interpretation2)
504
- grade.click(get_plot, inputs=in_text, outputs=plotter)
505
  grade1.click(speech_to_score, inputs=audio_file, outputs=diff_output)
506
  b1.click(speech_to_text, inputs=[audio_file1, target], outputs=[text, some_val, phones])
507
  get_syns.click(gen_syns, inputs=[words, lvl], outputs=reccos)
 
508
  demo.launch(debug=True)
 
66
  tokenized_text = word_tokenize(text)
67
 
68
  tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
69
+ global sim_words
70
  sim_words = {}
71
  if len(tokenized_text) <= 1:
72
  return 1, "More Text Required"
 
86
  if not comp.isalpha():
87
  continue
88
  try:
89
+ if cosine_similarity(w2v[anc].reshape(1, -1), w2v[comp].reshape(1, -1)) > .75 or comp in wn_syns(anc):
90
  vocab.append(comp)
91
  except KeyError:
92
  continue
 
97
  if len(value) == 1:
98
  scores[key] = -1
99
  continue
 
 
 
100
  t_sim = len(value)
101
  t_rep = (len(value)) - (len(set(value)))
102
 
 
106
 
107
  mean_score = 0
108
  total = 0
109
+
110
  for value in scores.values():
111
  if value == -1:
112
  continue
113
  mean_score += value
114
  total += 1
115
+ words = word_tokenize(text)
 
 
 
116
 
117
+ interpret_values = [('', 0.0)]
118
+
119
+ for key, value in scores.items():
120
+ interpret_values.append((words[key], value))
121
 
122
+ interpret_values.append(('', 0.0))
123
+ print(interpret_values)
124
+ int_vals = {'original': text, 'interpretation': interpret_values}
125
+ try:
126
 
127
+ return int_vals, {"Diversity Score": mean_score / total}
128
+ except ZeroDivisionError:
129
 
130
+ return int_vals, {"Dviersity Score": "Not Enough Data"}
131
+
132
+ def get_sim_words(text, word):
133
+ word = word.strip()
134
+ index = 0
135
+ text = word_tokenize(text)
136
+ print(sim_words)
137
+ for idx, i in enumerate(text):
138
+ if word == i:
139
+ index = idx
140
+ break
141
+ return ', '.join(sim_words[index])
142
 
143
 
144
  def dict_to_list(dictionary, max_size=10):
 
327
  return heatmap(diversity, df)
328
 
329
 
 
 
 
 
 
 
 
 
 
 
330
 
331
  def sliding_window(text):
332
  words = word_tokenize(text)
 
377
  ax.set_facecolor('w')
378
  fig = plt.gcf()
379
 
380
+ mapd = [('', 0)]
381
  maxy = max(inter_scores)
382
  miny = min(inter_scores)
383
  spread = maxy - miny
384
 
385
  for idx, i in enumerate(words):
386
+ mapd.append((i, (inter_scores[idx] - miny) / spread))
387
+ mapd.append(('', 0))
 
 
 
 
 
 
 
 
 
 
388
 
389
+ return fig, {'original': text, 'interpretation': mapd}
390
 
391
  def speech_to_text(speech, target):
392
  text = p(speech)["text"]
 
454
  div_output = gr.Label(label='Diversity Score', show_label=False)
455
  gr.Markdown("Diversity Heatmap | Blue cells are omitted from score | Darker = More Diverse")
456
  interpretation = gr.components.Interpretation(in_text, label="Diversity Heatmap")
457
+
458
+ gr.Markdown("Find Similar Words | Word must be part of analysis text | Enter only one word at a time")
459
+ words1 = gr.Textbox(label="Word For Similarity")
460
+ find_sim = gr.Button("Find Similar Words")
461
+ sims = gr.Label()
462
  with gr.Box():
463
  gr.Markdown("Relative Difficulty Heatmap- How confusing the text is in that area")
464
  interpretation2 = gr.components.Interpretation(in_text, label="Difficulty Heatmap")
 
497
 
498
 
499
  grade.click(reading_difficulty, inputs=in_text, outputs=diff_output)
500
+ grade.click(get_mean_score, inputs=in_text, outputs=[interpretation, div_output])
501
+ grade.click(get_dif_inter, inputs=in_text, outputs=[plotter, interpretation2])
 
 
502
  grade1.click(speech_to_score, inputs=audio_file, outputs=diff_output)
503
  b1.click(speech_to_text, inputs=[audio_file1, target], outputs=[text, some_val, phones])
504
  get_syns.click(gen_syns, inputs=[words, lvl], outputs=reccos)
505
+ find_sim.click(get_sim_words, inputs=[in_text, words1], outputs=sims)
506
  demo.launch(debug=True)