Spaces:

GenSEC-LLM
/

Post-ASR-LLM-Transcription-Correction

Running

huckiyang commited on Mar 14

Commit

fbba242

1 Parent(s): ba0ed8b

fix headers

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,10 +14,27 @@ def calculate_wer(examples):
     if not examples:
         return 0.0
-    hypotheses = [ex["hypothesis_concatenated"].split('.')[0].strip() for ex in examples]
-    transcriptions = [ex["transcription"].strip() for ex in examples]
-    wer = jiwer.wer(transcriptions, hypotheses)
     return wer
 # Get WER metrics by source and split

     if not examples:
         return 0.0
+    valid_pairs = []
+    for ex in examples:
+        # Get transcription and input1 fields
+        transcription = ex.get("transcription")
+        input1 = ex.get("input1")
+        # Only include examples where both fields exist and are not empty
+        if transcription and input1:
+            valid_pairs.append((transcription.strip(), input1.strip()))
+    # If no valid pairs were found, return NaN
+    if not valid_pairs:
+        return np.nan
+    # Separate references and hypotheses
+    references = [pair[0] for pair in valid_pairs]
+    hypotheses = [pair[1] for pair in valid_pairs]
+    # Calculate WER
+    wer = jiwer.wer(references, hypotheses)
     return wer
 # Get WER metrics by source and split