Spaces:

Slamlab
/

asr_metrics

Sleeping

App Files Files Community

akki2825 commited on Jul 3

Commit

dd10290

verified ·

1 Parent(s): ea825ae

misalignment at the word level

Browse files

Files changed (1) hide show

app.py +18 -20

app.py CHANGED Viewed

@@ -82,31 +82,29 @@ def calculate_sentence_metrics(reference, hypothesis):
 def identify_misaligned_sentences(reference_text, hypothesis_text):
     """
     Identify sentences that don't match between reference and hypothesis.
-    Handles cases where the number of sentences differ.
     Returns a dictionary with misaligned sentence pairs, their indices, and misalignment details.
     """
     reference_sentences = split_into_sentences(reference_text)
     hypothesis_sentences = split_into_sentences(hypothesis_text)
     misaligned = []
-    min_length = min(len(reference_sentences), len(hypothesis_sentences))
-    # Compare sentences up to the minimum length
-    for i in range(min_length):
-        ref = reference_sentences[i]
-        hyp = hypothesis_sentences[i]
         if ref != hyp:
-            # Find the first position where the sentences diverge
-            min_len = min(len(ref), len(hyp))
             misalignment_start = 0
-            for j in range(min_len):
-                if ref[j] != hyp[j]:
                     misalignment_start = j
                     break
             # Prepare the context for display
-            context_ref = ref[:misalignment_start] + f"**{ref[misalignment_start:]}**"
-            context_hyp = hyp[:misalignment_start] + f"**{hyp[misalignment_start:]}**"
             misaligned.append({
                 "index": i+1,
@@ -117,9 +115,9 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
                 "context_hyp": context_hyp
             })
-    # Note any extra sentences as misaligned
     if len(reference_sentences) > len(hypothesis_sentences):
-        for i in range(min_length, len(reference_sentences)):
             misaligned.append({
                 "index": i+1,
                 "reference": reference_sentences[i],
@@ -129,7 +127,7 @@ def identify_misaligned_sentences(reference_text, hypothesis_text):
                 "context_hyp": "No corresponding sentence"
             })
     elif len(hypothesis_sentences) > len(reference_sentences):
-        for i in range(min_length, len(hypothesis_sentences)):
             misaligned.append({
                 "index": i+1,
                 "reference": "No corresponding sentence",
@@ -162,9 +160,9 @@ def format_sentence_metrics(sentence_wers, sentence_cers, average_wer, average_c
         md += "\n### Misaligned Sentences\n\n"
         for misaligned in misaligned_sentences:
             md += f"#### Sentence {misaligned['index']}\n"
-            md += f"* Reference: {misaligned['reference']}\n"
-            md += f"* Hypothesis: {misaligned['hypothesis']}\n"
-            md += f"* Misalignment starts at position: {misaligned['misalignment_start']}\n\n"
     else:
         md += "\n### Misaligned Sentences\n\n"
         md += "* No misaligned sentences found."

 def identify_misaligned_sentences(reference_text, hypothesis_text):
     """
     Identify sentences that don't match between reference and hypothesis.
     Returns a dictionary with misaligned sentence pairs, their indices, and misalignment details.
     """
     reference_sentences = split_into_sentences(reference_text)
     hypothesis_sentences = split_into_sentences(hypothesis_text)
     misaligned = []
+    for i, (ref, hyp) in enumerate(zip(reference_sentences, hypothesis_sentences)):
         if ref != hyp:
+            # Split sentences into words
+            ref_words = ref.split()
+            hyp_words = hyp.split()
+            # Find the first position where the words diverge
+            min_length = min(len(ref_words), len(hyp_words))
             misalignment_start = 0
+            for j in range(min_length):
+                if ref_words[j] != hyp_words[j]:
                     misalignment_start = j
                     break
             # Prepare the context for display
+            context_ref = ' '.join(ref_words[:misalignment_start] + ['**' + ref_words[misalignment_start] + '**'])
+            context_hyp = ' '.join(hyp_words[:misalignment_start] + ['**' + hyp_words[misalignment_start] + '**'])
             misaligned.append({
                 "index": i+1,
                 "context_hyp": context_hyp
             })
+    # Handle cases where the number of sentences differs
     if len(reference_sentences) > len(hypothesis_sentences):
+        for i in range(len(hypothesis_sentences), len(reference_sentences)):
             misaligned.append({
                 "index": i+1,
                 "reference": reference_sentences[i],
                 "context_hyp": "No corresponding sentence"
             })
     elif len(hypothesis_sentences) > len(reference_sentences):
+        for i in range(len(reference_sentences), len(hypothesis_sentences)):
             misaligned.append({
                 "index": i+1,
                 "reference": "No corresponding sentence",
         md += "\n### Misaligned Sentences\n\n"
         for misaligned in misaligned_sentences:
             md += f"#### Sentence {misaligned['index']}\n"
+            md += f"* Reference: {misaligned['context_ref']}\n"
+            md += f"* Hypothesis: {misaligned['context_hyp']}\n"
+            md += f"* Misalignment starts at word: {misaligned['misalignment_start'] + 1}\n\n"
     else:
         md += "\n### Misaligned Sentences\n\n"
         md += "* No misaligned sentences found."