Spaces:
Sleeping
Sleeping
File size: 4,305 Bytes
1550706 aff6746 820ab2f 3c4a41b 820ab2f 5ae646e 820ab2f 1550706 aff6746 1550706 aff6746 1550706 aff6746 0c9415b aff6746 820ab2f aff6746 c155fa9 aff6746 c155fa9 aff6746 c155fa9 aff6746 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import spaces
import gradio as gr
import numpy as np
@spaces.GPU()
def get_mismatched_sentences(reference, hypothesis):
"""
Get mismatched sentences between reference and hypothesis.
"""
reference = reference.split()
hypothesis = hypothesis.split()
mismatched = []
for ref, hyp in zip(reference, hypothesis):
if ref != hyp:
mismatched.append((ref, hyp))
return mismatched
@spaces.GPU()
def calculate_wer(reference, hypothesis):
reference_words = reference.split()
hypothesis_words = hypothesis.split()
m = len(reference_words)
n = len(hypothesis_words)
# Initialize DP table
dp = np.zeros((m+1, n+1), dtype=np.int32)
# Base cases
for i in range(m+1):
dp[i][0] = i
for j in range(n+1):
dp[0][j] = j
# Fill DP table
for i in range(1, m+1):
for j in range(1, n+1):
cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1
dp[i][j] = min(dp[i-1][j] + 1, # Deletion
dp[i][j-1] + 1, # Insertion
dp[i-1][j-1] + cost) # Substitution or no cost
wer = dp[m][n] / m
return wer
@spaces.GPU()
def calculate_cer(reference, hypothesis):
reference = reference.replace(" ", "")
hypothesis = hypothesis.replace(" ", "")
m = len(reference)
n = len(hypothesis)
# Initialize DP table
dp = np.zeros((m+1, n+1), dtype=np.int32)
# Base cases
for i in range(m+1):
dp[i][0] = i
for j in range(n+1):
dp[0][j] = j
# Fill DP table
for i in range(1, m+1):
for j in range(1, n+1):
cost = 0 if reference[i-1] == hypothesis[j-1] else 1
dp[i][j] = min(dp[i-1][j] + 1, # Deletion
dp[i][j-1] + 1, # Insertion
dp[i-1][j-1] + cost) # Substitution or no cost
cer = dp[m][n] / m
return cer
@spaces.GPU()
def process_files(reference_file, hypothesis_file):
try:
with open(reference_file.name, 'r') as f:
reference_text = f.read()
with open(hypothesis_file.name, 'r') as f:
hypothesis_text = f.read()
wer_value = calculate_wer(reference_text, hypothesis_text)
cer_value = calculate_cer(reference_text, hypothesis_text)
mismatched_sentences = get_mismatched_sentences(reference_text, hypothesis_text)
return {
"WER": wer_value,
"CER": cer_value,
"Mismatched Sentences": mismatched_sentences
}
except Exception as e:
return {"error": str(e)}
def main():
with gr.Blocks() as demo:
gr.Markdown("# ASR Metrics Calculator")
with gr.Row():
reference_file = gr.File(label="Upload Reference File")
hypothesis_file = gr.File(label="Upload Hypothesis File")
with gr.Row():
reference_preview = gr.Textbox(label="Reference Preview", lines=3)
hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3)
with gr.Row():
compute_button = gr.Button("Compute Metrics")
results_output = gr.JSON(label="Results")
# Update previews when files are uploaded
def update_previews(ref_file, hyp_file):
ref_text = ""
hyp_text = ""
if ref_file:
with open(ref_file.name, 'r') as f:
ref_text = f.read()[:200] # Show first 200 characters
if hyp_file:
with open(hyp_file.name, 'r') as f:
hyp_text = f.read()[:200] # Show first 200 characters
return ref_text, hyp_text
reference_file.change(
fn=update_previews,
inputs=[reference_file, hypothesis_file],
outputs=[reference_preview, hypothesis_preview]
)
hypothesis_file.change(
fn=update_previews,
inputs=[reference_file, hypothesis_file],
outputs=[reference_preview, hypothesis_preview]
)
compute_button.click(
fn=process_files,
inputs=[reference_file, hypothesis_file],
outputs=results_output
)
demo.launch()
if __name__ == "__main__":
main()
|