Spaces:

RobPruzan
/

automaticlitassesment

Runtime error

App Files Files Community

RobPruzan commited on Aug 20, 2022

Commit

f005347

1 Parent(s): b58b3e7

Optimizations and more interpretation

Browse files

Files changed (1) hide show

app.py +38 -40

app.py CHANGED Viewed

@@ -66,6 +66,7 @@ def calculate_diversity(text):
     tokenized_text = word_tokenize(text)
     tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
     sim_words = {}
     if len(tokenized_text) <= 1:
         return 1, "More Text Required"
@@ -85,7 +86,7 @@ def calculate_diversity(text):
             if not comp.isalpha():
                 continue
             try:
-                if cosine_similarity(w2v[anc].reshape(1, -1), w2v[comp].reshape(1, -1)) > .7 or comp in wn_syns(anc):
                     vocab.append(comp)
             except KeyError:
                 continue
@@ -96,9 +97,6 @@ def calculate_diversity(text):
         if len(value) == 1:
             scores[key] = -1
             continue
-        # if len(value) == 2:
-        #   scores[key] = -1
-        #   continue
         t_sim = len(value)
         t_rep = (len(value)) - (len(set(value)))
@@ -108,24 +106,39 @@ def calculate_diversity(text):
     mean_score = 0
     total = 0
     for value in scores.values():
         if value == -1:
             continue
         mean_score += value
         total += 1
-    try:
-        return scores, {"Diversity Score": mean_score / total}
-    except ZeroDivisionError:
-        return scores, {"Dviersity Score": "Not Enough Data"}
-def get_scores(text):
-    return calculate_diversity(text)[0]
-def get_mean_score(text):
-    return calculate_diversity(text)[1]
 def dict_to_list(dictionary, max_size=10):
@@ -314,16 +327,6 @@ def plot():
     return heatmap(diversity, df)
-def diversity_inter(text):
-    words = word_tokenize(text)
-    scores = get_scores(text)
-    interpret_values = [('', 0.0)]
-    for key, value in scores.items():
-        interpret_values.append((words[key], value))
-    interpret_values.append(('', 0.0))
-    print(interpret_values)
-    return {'original': text, 'interpretation': interpret_values}
 def sliding_window(text):
     words = word_tokenize(text)
@@ -374,25 +377,16 @@ def sliding_window(text):
     ax.set_facecolor('w')
     fig = plt.gcf()
-    map = [('', 0)]
     maxy = max(inter_scores)
     miny = min(inter_scores)
     spread = maxy - miny
     for idx, i in enumerate(words):
-        map.append((i, (inter_scores[idx] - miny) / spread))
-    map.append(('', 0))
-    return fig, map
-def get_plot(text):
-    return sliding_window(text)[0]
-def get_dif_inter(text):
-    return {'original': text, 'interpretation': sliding_window(text)[1]}
 def speech_to_text(speech, target):
     text = p(speech)["text"]
@@ -460,6 +454,11 @@ with gr.Blocks(title="Automatic Literacy and Speech Assesmen") as demo:
         div_output = gr.Label(label='Diversity Score', show_label=False)
         gr.Markdown("Diversity Heatmap | Blue cells are omitted from score | Darker = More Diverse")
         interpretation = gr.components.Interpretation(in_text, label="Diversity Heatmap")
       with gr.Box():
           gr.Markdown("Relative Difficulty Heatmap- How confusing the text is in that area")
           interpretation2 = gr.components.Interpretation(in_text, label="Difficulty Heatmap")
@@ -498,11 +497,10 @@ with gr.Blocks(title="Automatic Literacy and Speech Assesmen") as demo:
   grade.click(reading_difficulty, inputs=in_text, outputs=diff_output)
-  grade.click(get_mean_score, inputs=in_text, outputs=div_output)
-  grade.click(diversity_inter, inputs=in_text, outputs=interpretation)
-  grade.click(get_dif_inter, inputs=in_text, outputs=interpretation2)
-  grade.click(get_plot, inputs=in_text, outputs=plotter)
   grade1.click(speech_to_score, inputs=audio_file, outputs=diff_output)
   b1.click(speech_to_text, inputs=[audio_file1, target], outputs=[text, some_val, phones])
   get_syns.click(gen_syns, inputs=[words, lvl], outputs=reccos)
 demo.launch(debug=True)

     tokenized_text = word_tokenize(text)
     tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
+    global sim_words
     sim_words = {}
     if len(tokenized_text) <= 1:
         return 1, "More Text Required"
             if not comp.isalpha():
                 continue
             try:
+                if cosine_similarity(w2v[anc].reshape(1, -1), w2v[comp].reshape(1, -1)) > .75 or comp in wn_syns(anc):
                     vocab.append(comp)
             except KeyError:
                 continue
         if len(value) == 1:
             scores[key] = -1
             continue
         t_sim = len(value)
         t_rep = (len(value)) - (len(set(value)))
     mean_score = 0
     total = 0
     for value in scores.values():
         if value == -1:
             continue
         mean_score += value
         total += 1
+        words = word_tokenize(text)
+    interpret_values = [('', 0.0)]
+    for key, value in scores.items():
+        interpret_values.append((words[key], value))
+    interpret_values.append(('', 0.0))
+    print(interpret_values)
+    int_vals = {'original': text, 'interpretation': interpret_values}
+    try:
+        return int_vals, {"Diversity Score": mean_score / total}
+    except ZeroDivisionError:
+        return int_vals, {"Dviersity Score": "Not Enough Data"}
+def get_sim_words(text, word):
+    word = word.strip()
+    index = 0
+    text = word_tokenize(text)
+    print(sim_words)
+    for idx, i in enumerate(text):
+        if word == i:
+            index = idx
+            break
+    return ', '.join(sim_words[index])
 def dict_to_list(dictionary, max_size=10):
     return heatmap(diversity, df)
 def sliding_window(text):
     words = word_tokenize(text)
     ax.set_facecolor('w')
     fig = plt.gcf()
+    mapd = [('', 0)]
     maxy = max(inter_scores)
     miny = min(inter_scores)
     spread = maxy - miny
     for idx, i in enumerate(words):
+        mapd.append((i, (inter_scores[idx] - miny) / spread))
+    mapd.append(('', 0))
+    return fig, {'original': text, 'interpretation': mapd}
 def speech_to_text(speech, target):
     text = p(speech)["text"]
         div_output = gr.Label(label='Diversity Score', show_label=False)
         gr.Markdown("Diversity Heatmap | Blue cells are omitted from score | Darker = More Diverse")
         interpretation = gr.components.Interpretation(in_text, label="Diversity Heatmap")
+        gr.Markdown("Find Similar Words | Word must be part of analysis text | Enter only one word at a time")
+        words1 = gr.Textbox(label="Word For Similarity")
+        find_sim = gr.Button("Find Similar Words")
+        sims = gr.Label()
       with gr.Box():
           gr.Markdown("Relative Difficulty Heatmap- How confusing the text is in that area")
           interpretation2 = gr.components.Interpretation(in_text, label="Difficulty Heatmap")
   grade.click(reading_difficulty, inputs=in_text, outputs=diff_output)
+  grade.click(get_mean_score, inputs=in_text, outputs=[interpretation, div_output])
+  grade.click(get_dif_inter, inputs=in_text, outputs=[plotter, interpretation2])
   grade1.click(speech_to_score, inputs=audio_file, outputs=diff_output)
   b1.click(speech_to_text, inputs=[audio_file1, target], outputs=[text, some_val, phones])
   get_syns.click(gen_syns, inputs=[words, lvl], outputs=reccos)
+  find_sim.click(get_sim_words, inputs=[in_text, words1], outputs=sims)
 demo.launch(debug=True)