Spaces:

Slamlab
/

asr_metrics

Sleeping

App Files Files Community

akki2825 commited on Jul 17

Commit

45347ae

verified ·

1 Parent(s): 85366cd

fix readlines issue

Browse files

Files changed (1) hide show

app.py +82 -179

app.py CHANGED Viewed

@@ -1,112 +1,71 @@
-import spaces
 import jiwer
 import numpy as np
-import re
 import gradio as gr
-def split_into_sentences(text):
-    """
-    Simple sentence tokenizer using regular expressions.
-    Splits text into sentences based on punctuation.
-    """
-    sentences = re.split(r'(?<=[.!?])\s+', text)
-    sentences = [s.strip() for s in sentences if s.strip()]
-    return sentences
-@spaces.GPU()
 def calculate_wer(reference, hypothesis):
-    """
-    Calculate the Word Error Rate (WER) using jiwer.
-    """
-    wer = jiwer.wer(reference, hypothesis)
-    return wer
-@spaces.GPU()
 def calculate_cer(reference, hypothesis):
-    """
-    Calculate the Character Error Rate (CER) using jiwer.
-    """
-    cer = jiwer.cer(reference, hypothesis)
-    return cer
-@spaces.GPU()
 def calculate_sentence_metrics(reference, hypothesis):
-    """
-    Calculate WER and CER for each sentence and overall statistics.
-    Handles cases where the number of sentences differ.
-    """
-    try:
-        reference_sentences = reference
-        hypothesis_sentences = hypothesis
-        sentence_wers = []
-        sentence_cers = []
-        min_length = min(len(reference_sentences), len(hypothesis_sentences))
-        for i in range(min_length):
-            ref = reference_sentences[i]
-            hyp = hypothesis_sentences[i]
-            wer = jiwer.wer(ref, hyp)
-            cer = jiwer.cer(ref, hyp)
-            sentence_wers.append(wer)
-            sentence_cers.append(cer)
-        # Calculate overall statistics
-        if sentence_wers:
-            average_wer = np.mean(sentence_wers)
-            std_dev_wer = np.std(sentence_wers)
-        else:
-            average_wer = 0.0
-            std_dev_wer = 0.0
-        if sentence_cers:
-            average_cer = np.mean(sentence_cers)
-            std_dev_cer = np.std(sentence_cers)
-        else:
-            average_cer = 0.0
-            std_dev_cer = 0.0
-        return {
-            "sentence_wers": sentence_wers,
-            "sentence_cers": sentence_cers,
-            "average_wer": average_wer,
-            "average_cer": average_cer,
-            "std_dev_wer": std_dev_wer,
-            "std_dev_cer": std_dev_cer
-        }
-    except Exception as e:
-        raise e
-def identify_misaligned_sentences(reference_text, hypothesis_text):
-    """
-    Identify sentences that don't match between reference and hypothesis.
-    Returns a dictionary with misaligned sentence pairs, their indices, and misalignment details.
-    """
-    reference_sentences = reference_text
-    hypothesis_sentences = hypothesis_text
     misaligned = []
     for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
         if ref != hyp:
-            # Split sentences into words
             ref_words = ref.split()
             hyp_words = hyp.split()
-            # Find the first position where the words diverge
             min_length = min(len(ref_words), len(hyp_words))
             misalignment_start = 0
             for j in range(min_length):
                 if ref_words[j] != hyp_words[j]:
                     misalignment_start = j
                     break
-            # Prepare the context for display
-            context_ref = ' '.join(ref_words[:misalignment_start] + ['**' + ref_words[misalignment_start] + '**'])
-            context_hyp = ' '.join(hyp_words[:misalignment_start] + ['**' + hyp_words[misalignment_start] + '**'])
             misaligned.append({
-                "index": i+1,
                 "reference": ref,
                 "hypothesis": hyp,
                 "misalignment_start": misalignment_start,
@@ -114,11 +73,11 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
                 "context_hyp": context_hyp
             })
-    # Handle cases where the number of sentences differs
     if len(reference_sentences) > len(hypothesis_sentences):
         for i in range(len(hypothesis_sentences), len(reference_sentences)):
             misaligned.append({
-                "index": i+1,
                 "reference": reference_sentences[i],
                 "hypothesis": "No corresponding sentence",
                 "misalignment_start": 0,
@@ -128,7 +87,7 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
     elif len(hypothesis_sentences) > len(reference_sentences):
         for i in range(len(reference_sentences), len(hypothesis_sentences)):
             misaligned.append({
-                "index": i+1,
                 "reference": "No corresponding sentence",
                 "hypothesis": hypothesis_sentences[i],
                 "misalignment_start": 0,
@@ -138,45 +97,27 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
     return misaligned
-def format_sentence_metrics(sentence_wers, sentence_cers, average_wer, average_cer, std_dev_wer, std_dev_cer, misaligned_sentences):
     md = "### Sentence-level Metrics\n\n"
-    md += "#### Word Error Rate (WER)\n"
-    md += f"* Average WER: {average_wer:.2f}\n"
-    md += f"* Standard Deviation: {std_dev_wer:.2f}\n\n"
-    md += "#### Character Error Rate (CER)\n"
-    md += f"* Average CER: {average_cer:.2f}\n"
-    md += f"* Standard Deviation: {std_dev_cer:.2f}\n\n"
-    md += "### WER for Each Sentence\n\n"
     for i, wer in enumerate(sentence_wers):
-        md += f"* Sentence {i+1}: {wer:.2f}\n"
-    md += "\n### CER for Each Sentence\n\n"
     for i, cer in enumerate(sentence_cers):
-        md += f"* Sentence {i+1}: {cer:.2f}\n"
-    # if misaligned_sentences:
-    #     md += "\n### Misaligned Sentences\n\n"
-    #     for misaligned in misaligned_sentences:
-    #         md += f"#### Sentence {misaligned['index']}\n"
-    #         md += f"* Reference: {misaligned['context_ref']}\n"
-    #         md += f"* Hypothesis: {misaligned['context_hyp']}\n"
-    #         md += f"* Misalignment starts at word: {misaligned['misalignment_start'] + 1}\n\n"
-    # else:
-    #     md += "\n### Misaligned Sentences\n\n"
-    #     md += "* No misaligned sentences found."
     return md
-@spaces.GPU()
 def process_files(reference_file, hypothesis_file):
     try:
-        with open(reference_file.name, 'r') as f:
-            reference_text = f.readlines()
-        with open(hypothesis_file.name, 'r') as f:
-            hypothesis_text = f.readlines()
         overall_wer = calculate_wer(reference_text, hypothesis_text)
         overall_cer = calculate_cer(reference_text, hypothesis_text)
@@ -186,24 +127,17 @@ def process_files(reference_file, hypothesis_file):
         return {
             "Overall WER": overall_wer,
             "Overall CER": overall_cer,
-            "Sentence WERs": sentence_metrics["sentence_wers"],
-            "Sentence CERs": sentence_metrics["sentence_cers"],
-            "Average WER": sentence_metrics["average_wer"],
-            "Average CER": sentence_metrics["average_cer"],
-            "Standard Deviation WER": sentence_metrics["std_dev_wer"],
-            "Standard Deviation CER": sentence_metrics["std_dev_cer"],
             "Misaligned Sentences": misaligned
         }
     except Exception as e:
         return {"error": str(e)}
 def process_and_display(ref_file, hyp_file):
     result = process_files(ref_file, hyp_file)
     if "error" in result:
-        error_msg = result["error"]
-        return {"error": error_msg}, "", ""
     metrics = {
         "Overall WER": result["Overall WER"],
@@ -211,46 +145,21 @@ def process_and_display(ref_file, hyp_file):
     }
     metrics_md = format_sentence_metrics(
-        result["Sentence WERs"],
-        result["Sentence CERs"],
-        result["Average WER"],
-        result["Average CER"],
-        result["Standard Deviation WER"],
-        result["Standard Deviation CER"],
-        result["Misaligned Sentences"]
-    )
-    return metrics, metrics_md
-def process_and_display(ref_file, hyp_file):
-    result = process_files(ref_file, hyp_file)
-    if "error" in result:
-        error_msg = result["error"]
-        return {"error": error_msg}, "", ""
-    metrics = {
-        "Overall WER": result["Overall WER"],
-        "Overall CER": result["Overall CER"]
-    }
-    metrics_md = format_sentence_metrics(
-        result["Sentence WERs"],
-        result["Sentence CERs"],
-        result["Average WER"],
-        result["Average CER"],
-        result["Standard Deviation WER"],
-        result["Standard Deviation CER"],
-        result["Misaligned Sentences"]
     )
     misaligned_md = "### Misaligned Sentences\n\n"
     if result["Misaligned Sentences"]:
-        for misaligned in result["Misaligned Sentences"]:
-            misaligned_md += f"#### Sentence {misaligned['index']}\n"
-            misaligned_md += f"* Reference: {misaligned['context_ref']}\n"
-            misaligned_md += f"* Hypothesis: {misaligned['context_hyp']}\n"
-            misaligned_md += f"* Misalignment starts at position: {misaligned['misalignment_start']}\n\n"
     else:
         misaligned_md += "* No misaligned sentences found."
@@ -258,25 +167,19 @@ def process_and_display(ref_file, hyp_file):
 def main():
     with gr.Blocks() as demo:
-        gr.Markdown("# ASR Metrics Analysis")
-        with gr.Row() as row:
             with gr.Column():
-                gr.Markdown("### Instructions")
-                gr.Markdown("* Upload your reference and hypothesis files")
-                gr.Markdown("* The interface will automatically analyze and display metrics")
-                gr.Markdown("* Results include overall metrics, sentence-level metrics, and misaligned sentences")
-                with gr.Row():
-                    reference_file = gr.File(label="Upload Reference File")
-                    hypothesis_file = gr.File(label="Upload Model Output File")
-                with gr.Row():
-                    compute_button = gr.Button("Compute Metrics", variant="primary")
-                    results_output = gr.JSON(label="Results")
-                    metrics_output = gr.Markdown(label="Metrics")
-                    misaligned_output = gr.Markdown(label="Misaligned Sentences")
         compute_button.click(
             fn=process_and_display,
@@ -287,4 +190,4 @@ def main():
     demo.launch()
 if __name__ == "__main__":
-    main()

 import jiwer
 import numpy as np
 import gradio as gr
 def calculate_wer(reference, hypothesis):
+    reference_str = " ".join(reference)
+    hypothesis_str = " ".join(hypothesis)
+    return jiwer.wer(reference_str, hypothesis_str)
 def calculate_cer(reference, hypothesis):
+    reference_str = " ".join(reference)
+    hypothesis_str = " ".join(hypothesis)
+    return jiwer.cer(reference_str, hypothesis_str)
 def calculate_sentence_metrics(reference, hypothesis):
+    reference_sentences = [line.strip() for line in reference]
+    hypothesis_sentences = [line.strip() for line in hypothesis]
+    sentence_wers = []
+    sentence_cers = []
+    min_length = min(len(reference_sentences), len(hypothesis_sentences))
+    for i in range(min_length):
+        ref = reference_sentences[i]
+        hyp = hypothesis_sentences[i]
+        wer = jiwer.wer(ref, hyp)
+        cer = jiwer.cer(ref, hyp)
+        sentence_wers.append(wer)
+        sentence_cers.append(cer)
+    average_wer = np.mean(sentence_wers) if sentence_wers else 0.0
+    std_dev_wer = np.std(sentence_wers) if sentence_wers else 0.0
+    average_cer = np.mean(sentence_cers) if sentence_cers else 0.0
+    std_dev_cer = np.std(sentence_cers) if sentence_cers else 0.0
+    return {
+        "sentence_wers": sentence_wers,
+        "sentence_cers": sentence_cers,
+        "average_wer": average_wer,
+        "average_cer": average_cer,
+        "std_dev_wer": std_dev_wer,
+        "std_dev_cer": std_dev_cer
+    }
+def identify_misaligned_sentences(reference, hypothesis):
+    reference_sentences = [line.strip() for line in reference]
+    hypothesis_sentences = [line.strip() for line in hypothesis]
     misaligned = []
     for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
         if ref != hyp:
             ref_words = ref.split()
             hyp_words = hyp.split()
             min_length = min(len(ref_words), len(hyp_words))
             misalignment_start = 0
             for j in range(min_length):
                 if ref_words[j] != hyp_words[j]:
                     misalignment_start = j
                     break
+            context_ref = ' '.join(ref_words[:misalignment_start] + ['**' + ref_words[misalignment_start] + '**']) if ref_words else ""
+            context_hyp = ' '.join(hyp_words[:misalignment_start] + ['**' + hyp_words[misalignment_start] + '**']) if hyp_words else ""
             misaligned.append({
+                "index": i + 1,
                 "reference": ref,
                 "hypothesis": hyp,
                 "misalignment_start": misalignment_start,
                 "context_hyp": context_hyp
             })
+    # Handle extra sentences
     if len(reference_sentences) > len(hypothesis_sentences):
         for i in range(len(hypothesis_sentences), len(reference_sentences)):
             misaligned.append({
+                "index": i + 1,
                 "reference": reference_sentences[i],
                 "hypothesis": "No corresponding sentence",
                 "misalignment_start": 0,
     elif len(hypothesis_sentences) > len(reference_sentences):
         for i in range(len(reference_sentences), len(hypothesis_sentences)):
             misaligned.append({
+                "index": i + 1,
                 "reference": "No corresponding sentence",
                 "hypothesis": hypothesis_sentences[i],
                 "misalignment_start": 0,
     return misaligned
+def format_sentence_metrics(sentence_wers, sentence_cers, average_wer, average_cer, std_dev_wer, std_dev_cer):
     md = "### Sentence-level Metrics\n\n"
+    md += f"**Average WER**: {average_wer:.2f}\n\n"
+    md += f"**Standard Deviation WER**: {std_dev_wer:.2f}\n\n"
+    md += f"**Average CER**: {average_cer:.2f}\n\n"
+    md += f"**Standard Deviation CER**: {std_dev_cer:.2f}\n\n"
+    md += "---\n**WER by Sentence**\n"
     for i, wer in enumerate(sentence_wers):
+        md += f"- Sentence {i+1}: {wer:.2f}\n"
+    md += "\n**CER by Sentence**\n"
     for i, cer in enumerate(sentence_cers):
+        md += f"- Sentence {i+1}: {cer:.2f}\n"
     return md
 def process_files(reference_file, hypothesis_file):
     try:
+        reference_text = reference_file.read().decode("utf-8").splitlines()
+        hypothesis_text = hypothesis_file.read().decode("utf-8").splitlines()
         overall_wer = calculate_wer(reference_text, hypothesis_text)
         overall_cer = calculate_cer(reference_text, hypothesis_text)
         return {
             "Overall WER": overall_wer,
             "Overall CER": overall_cer,
+            **sentence_metrics,
             "Misaligned Sentences": misaligned
         }
     except Exception as e:
         return {"error": str(e)}
 def process_and_display(ref_file, hyp_file):
     result = process_files(ref_file, hyp_file)
     if "error" in result:
+        return {"error": result["error"]}, "", ""
     metrics = {
         "Overall WER": result["Overall WER"],
     }
     metrics_md = format_sentence_metrics(
+        result["sentence_wers"],
+        result["sentence_cers"],
+        result["average_wer"],
+        result["average_cer"],
+        result["std_dev_wer"],
+        result["std_dev_cer"]
     )
     misaligned_md = "### Misaligned Sentences\n\n"
     if result["Misaligned Sentences"]:
+        for mis in result["Misaligned Sentences"]:
+            misaligned_md += f"**Sentence {mis['index']}**\n"
+            misaligned_md += f"- Reference: {mis['context_ref']}\n"
+            misaligned_md += f"- Hypothesis: {mis['context_hyp']}\n"
+            misaligned_md += f"- Misalignment starts at position: {mis['misalignment_start']}\n\n"
     else:
         misaligned_md += "* No misaligned sentences found."
 def main():
     with gr.Blocks() as demo:
+        gr.Markdown("# 📊 ASR Metrics Analysis Tool")
+        with gr.Row():
             with gr.Column():
+                gr.Markdown("### Upload your reference and hypothesis files")
+                reference_file = gr.File(label="Reference File (.txt)")
+                hypothesis_file = gr.File(label="Hypothesis File (.txt)")
+                compute_button = gr.Button("Compute Metrics", variant="primary")
+            with gr.Column():
+                results_output = gr.JSON(label="Results Summary")
+                metrics_output = gr.Markdown(label="Sentence Metrics")
+                misaligned_output = gr.Markdown(label="Misaligned Sentences")
         compute_button.click(
             fn=process_and_display,
     demo.launch()
 if __name__ == "__main__":
+    main()