Spaces:
Runtime error
Runtime error
Optimizations and more interpretation
Browse files
app.py
CHANGED
@@ -66,6 +66,7 @@ def calculate_diversity(text):
|
|
66 |
tokenized_text = word_tokenize(text)
|
67 |
|
68 |
tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
|
|
|
69 |
sim_words = {}
|
70 |
if len(tokenized_text) <= 1:
|
71 |
return 1, "More Text Required"
|
@@ -85,7 +86,7 @@ def calculate_diversity(text):
|
|
85 |
if not comp.isalpha():
|
86 |
continue
|
87 |
try:
|
88 |
-
if cosine_similarity(w2v[anc].reshape(1, -1), w2v[comp].reshape(1, -1)) > .
|
89 |
vocab.append(comp)
|
90 |
except KeyError:
|
91 |
continue
|
@@ -96,9 +97,6 @@ def calculate_diversity(text):
|
|
96 |
if len(value) == 1:
|
97 |
scores[key] = -1
|
98 |
continue
|
99 |
-
# if len(value) == 2:
|
100 |
-
# scores[key] = -1
|
101 |
-
# continue
|
102 |
t_sim = len(value)
|
103 |
t_rep = (len(value)) - (len(set(value)))
|
104 |
|
@@ -108,24 +106,39 @@ def calculate_diversity(text):
|
|
108 |
|
109 |
mean_score = 0
|
110 |
total = 0
|
111 |
-
|
112 |
for value in scores.values():
|
113 |
if value == -1:
|
114 |
continue
|
115 |
mean_score += value
|
116 |
total += 1
|
117 |
-
|
118 |
-
return scores, {"Diversity Score": mean_score / total}
|
119 |
-
except ZeroDivisionError:
|
120 |
-
return scores, {"Dviersity Score": "Not Enough Data"}
|
121 |
|
|
|
|
|
|
|
|
|
122 |
|
123 |
-
|
124 |
-
|
|
|
|
|
125 |
|
|
|
|
|
126 |
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
|
131 |
def dict_to_list(dictionary, max_size=10):
|
@@ -314,16 +327,6 @@ def plot():
|
|
314 |
return heatmap(diversity, df)
|
315 |
|
316 |
|
317 |
-
def diversity_inter(text):
|
318 |
-
words = word_tokenize(text)
|
319 |
-
scores = get_scores(text)
|
320 |
-
interpret_values = [('', 0.0)]
|
321 |
-
for key, value in scores.items():
|
322 |
-
interpret_values.append((words[key], value))
|
323 |
-
interpret_values.append(('', 0.0))
|
324 |
-
print(interpret_values)
|
325 |
-
return {'original': text, 'interpretation': interpret_values}
|
326 |
-
|
327 |
|
328 |
def sliding_window(text):
|
329 |
words = word_tokenize(text)
|
@@ -374,25 +377,16 @@ def sliding_window(text):
|
|
374 |
ax.set_facecolor('w')
|
375 |
fig = plt.gcf()
|
376 |
|
377 |
-
|
378 |
maxy = max(inter_scores)
|
379 |
miny = min(inter_scores)
|
380 |
spread = maxy - miny
|
381 |
|
382 |
for idx, i in enumerate(words):
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
return fig, map
|
387 |
-
|
388 |
-
|
389 |
-
def get_plot(text):
|
390 |
-
return sliding_window(text)[0]
|
391 |
-
|
392 |
-
|
393 |
-
def get_dif_inter(text):
|
394 |
-
return {'original': text, 'interpretation': sliding_window(text)[1]}
|
395 |
|
|
|
396 |
|
397 |
def speech_to_text(speech, target):
|
398 |
text = p(speech)["text"]
|
@@ -460,6 +454,11 @@ with gr.Blocks(title="Automatic Literacy and Speech Assesmen") as demo:
|
|
460 |
div_output = gr.Label(label='Diversity Score', show_label=False)
|
461 |
gr.Markdown("Diversity Heatmap | Blue cells are omitted from score | Darker = More Diverse")
|
462 |
interpretation = gr.components.Interpretation(in_text, label="Diversity Heatmap")
|
|
|
|
|
|
|
|
|
|
|
463 |
with gr.Box():
|
464 |
gr.Markdown("Relative Difficulty Heatmap- How confusing the text is in that area")
|
465 |
interpretation2 = gr.components.Interpretation(in_text, label="Difficulty Heatmap")
|
@@ -498,11 +497,10 @@ with gr.Blocks(title="Automatic Literacy and Speech Assesmen") as demo:
|
|
498 |
|
499 |
|
500 |
grade.click(reading_difficulty, inputs=in_text, outputs=diff_output)
|
501 |
-
grade.click(get_mean_score, inputs=in_text, outputs=div_output)
|
502 |
-
grade.click(
|
503 |
-
grade.click(get_dif_inter, inputs=in_text, outputs=interpretation2)
|
504 |
-
grade.click(get_plot, inputs=in_text, outputs=plotter)
|
505 |
grade1.click(speech_to_score, inputs=audio_file, outputs=diff_output)
|
506 |
b1.click(speech_to_text, inputs=[audio_file1, target], outputs=[text, some_val, phones])
|
507 |
get_syns.click(gen_syns, inputs=[words, lvl], outputs=reccos)
|
|
|
508 |
demo.launch(debug=True)
|
|
|
66 |
tokenized_text = word_tokenize(text)
|
67 |
|
68 |
tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
|
69 |
+
global sim_words
|
70 |
sim_words = {}
|
71 |
if len(tokenized_text) <= 1:
|
72 |
return 1, "More Text Required"
|
|
|
86 |
if not comp.isalpha():
|
87 |
continue
|
88 |
try:
|
89 |
+
if cosine_similarity(w2v[anc].reshape(1, -1), w2v[comp].reshape(1, -1)) > .75 or comp in wn_syns(anc):
|
90 |
vocab.append(comp)
|
91 |
except KeyError:
|
92 |
continue
|
|
|
97 |
if len(value) == 1:
|
98 |
scores[key] = -1
|
99 |
continue
|
|
|
|
|
|
|
100 |
t_sim = len(value)
|
101 |
t_rep = (len(value)) - (len(set(value)))
|
102 |
|
|
|
106 |
|
107 |
mean_score = 0
|
108 |
total = 0
|
109 |
+
|
110 |
for value in scores.values():
|
111 |
if value == -1:
|
112 |
continue
|
113 |
mean_score += value
|
114 |
total += 1
|
115 |
+
words = word_tokenize(text)
|
|
|
|
|
|
|
116 |
|
117 |
+
interpret_values = [('', 0.0)]
|
118 |
+
|
119 |
+
for key, value in scores.items():
|
120 |
+
interpret_values.append((words[key], value))
|
121 |
|
122 |
+
interpret_values.append(('', 0.0))
|
123 |
+
print(interpret_values)
|
124 |
+
int_vals = {'original': text, 'interpretation': interpret_values}
|
125 |
+
try:
|
126 |
|
127 |
+
return int_vals, {"Diversity Score": mean_score / total}
|
128 |
+
except ZeroDivisionError:
|
129 |
|
130 |
+
return int_vals, {"Dviersity Score": "Not Enough Data"}
|
131 |
+
|
132 |
+
def get_sim_words(text, word):
|
133 |
+
word = word.strip()
|
134 |
+
index = 0
|
135 |
+
text = word_tokenize(text)
|
136 |
+
print(sim_words)
|
137 |
+
for idx, i in enumerate(text):
|
138 |
+
if word == i:
|
139 |
+
index = idx
|
140 |
+
break
|
141 |
+
return ', '.join(sim_words[index])
|
142 |
|
143 |
|
144 |
def dict_to_list(dictionary, max_size=10):
|
|
|
327 |
return heatmap(diversity, df)
|
328 |
|
329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
|
331 |
def sliding_window(text):
|
332 |
words = word_tokenize(text)
|
|
|
377 |
ax.set_facecolor('w')
|
378 |
fig = plt.gcf()
|
379 |
|
380 |
+
mapd = [('', 0)]
|
381 |
maxy = max(inter_scores)
|
382 |
miny = min(inter_scores)
|
383 |
spread = maxy - miny
|
384 |
|
385 |
for idx, i in enumerate(words):
|
386 |
+
mapd.append((i, (inter_scores[idx] - miny) / spread))
|
387 |
+
mapd.append(('', 0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
|
389 |
+
return fig, {'original': text, 'interpretation': mapd}
|
390 |
|
391 |
def speech_to_text(speech, target):
|
392 |
text = p(speech)["text"]
|
|
|
454 |
div_output = gr.Label(label='Diversity Score', show_label=False)
|
455 |
gr.Markdown("Diversity Heatmap | Blue cells are omitted from score | Darker = More Diverse")
|
456 |
interpretation = gr.components.Interpretation(in_text, label="Diversity Heatmap")
|
457 |
+
|
458 |
+
gr.Markdown("Find Similar Words | Word must be part of analysis text | Enter only one word at a time")
|
459 |
+
words1 = gr.Textbox(label="Word For Similarity")
|
460 |
+
find_sim = gr.Button("Find Similar Words")
|
461 |
+
sims = gr.Label()
|
462 |
with gr.Box():
|
463 |
gr.Markdown("Relative Difficulty Heatmap- How confusing the text is in that area")
|
464 |
interpretation2 = gr.components.Interpretation(in_text, label="Difficulty Heatmap")
|
|
|
497 |
|
498 |
|
499 |
grade.click(reading_difficulty, inputs=in_text, outputs=diff_output)
|
500 |
+
grade.click(get_mean_score, inputs=in_text, outputs=[interpretation, div_output])
|
501 |
+
grade.click(get_dif_inter, inputs=in_text, outputs=[plotter, interpretation2])
|
|
|
|
|
502 |
grade1.click(speech_to_score, inputs=audio_file, outputs=diff_output)
|
503 |
b1.click(speech_to_text, inputs=[audio_file1, target], outputs=[text, some_val, phones])
|
504 |
get_syns.click(gen_syns, inputs=[words, lvl], outputs=reccos)
|
505 |
+
find_sim.click(get_sim_words, inputs=[in_text, words1], outputs=sims)
|
506 |
demo.launch(debug=True)
|