huckiyang commited on
Commit
fbba242
·
1 Parent(s): ba0ed8b

fix headers

Browse files
Files changed (1) hide show
  1. app.py +20 -3
app.py CHANGED
@@ -14,10 +14,27 @@ def calculate_wer(examples):
14
  if not examples:
15
  return 0.0
16
 
17
- hypotheses = [ex["hypothesis_concatenated"].split('.')[0].strip() for ex in examples]
18
- transcriptions = [ex["transcription"].strip() for ex in examples]
19
 
20
- wer = jiwer.wer(transcriptions, hypotheses)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  return wer
22
 
23
  # Get WER metrics by source and split
 
14
  if not examples:
15
  return 0.0
16
 
17
+ valid_pairs = []
 
18
 
19
+ for ex in examples:
20
+ # Get transcription and input1 fields
21
+ transcription = ex.get("transcription")
22
+ input1 = ex.get("input1")
23
+
24
+ # Only include examples where both fields exist and are not empty
25
+ if transcription and input1:
26
+ valid_pairs.append((transcription.strip(), input1.strip()))
27
+
28
+ # If no valid pairs were found, return NaN
29
+ if not valid_pairs:
30
+ return np.nan
31
+
32
+ # Separate references and hypotheses
33
+ references = [pair[0] for pair in valid_pairs]
34
+ hypotheses = [pair[1] for pair in valid_pairs]
35
+
36
+ # Calculate WER
37
+ wer = jiwer.wer(references, hypotheses)
38
  return wer
39
 
40
  # Get WER metrics by source and split