Spaces:
Sleeping
Sleeping
File size: 3,972 Bytes
aff6746 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
import numpy as np
def calculate_wer(reference, hypothesis):
reference_words = reference.split()
hypothesis_words = hypothesis.split()
m = len(reference_words)
n = len(hypothesis_words)
# Initialize DP table
dp = np.zeros((m+1, n+1), dtype=np.int32)
# Base cases
for i in range(m+1):
dp[i][0] = i
for j in range(n+1):
dp[0][j] = j
# Fill DP table
for i in range(1, m+1):
for j in range(1, n+1):
cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1
dp[i][j] = min(dp[i-1][j] + 1, # Deletion
dp[i][j-1] + 1, # Insertion
dp[i-1][j-1] + cost) # Substitution or no cost
wer = dp[m][n] / m
return wer
def calculate_cer(reference, hypothesis):
reference = reference.replace(" ", "")
hypothesis = hypothesis.replace(" ", "")
m = len(reference)
n = len(hypothesis)
# Initialize DP table
dp = np.zeros((m+1, n+1), dtype=np.int32)
# Base cases
for i in range(m+1):
dp[i][0] = i
for j in range(n+1):
dp[0][j] = j
# Fill DP table
for i in range(1, m+1):
for j in range(1, n+1):
cost = 0 if reference[i-1] == hypothesis[j-1] else 1
dp[i][j] = min(dp[i-1][j] + 1, # Deletion
dp[i][j-1] + 1, # Insertion
dp[i-1][j-1] + cost) # Substitution or no cost
cer = dp[m][n] / m
return cer
def calculate_mer(reference, hypothesis):
# Custom mission error rate calculation
# Replace this with your specific logic
return 0.0
def process_files(reference_file, hypothesis_file):
try:
with open(reference_file.name, 'r') as f:
reference_text = f.read()
with open(hypothesis_file.name, 'r') as f:
hypothesis_text = f.read()
wer_value = calculate_wer(reference_text, hypothesis_text)
cer_value = calculate_cer(reference_text, hypothesis_text)
mer_value = calculate_mer(reference_text, hypothesis_text)
return {
"WER": wer_value,
"CER": cer_value,
"MER": mer_value
}
except Exception as e:
return {"error": str(e)}
def main():
with gr.Blocks() as demo:
gr.Markdown("# ASR Metrics Calculator")
with gr.Row():
reference_file = gr.File(label="Upload Reference File")
hypothesis_file = gr.File(label="Upload Hypothesis File")
with gr.Row():
reference_preview = gr.Textbox(label="Reference Preview", lines=3)
hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3)
with gr.Row():
compute_button = gr.Button("Compute Metrics")
results_output = gr.JSON(label="Results")
# Update previews when files are uploaded
def update_previews(ref_file, hyp_file):
if ref_file:
with open(ref_file.name, 'r') as f:
ref_text = f.read()
return ref_text[:200] # Show first 200 characters
if hyp_file:
with open(hyp_file.name, 'r') as f:
hyp_text = f.read()
return hyp_text[:200] # Show first 200 characters
return "", ""
reference_file.change(
fn=update_previews,
inputs=[reference_file, hypothesis_file],
outputs=[reference_preview, hypothesis_preview]
)
hypothesis_file.change(
fn=update_previews,
inputs=[reference_file, hypothesis_file],
outputs=[reference_preview, hypothesis_preview]
)
compute_button.click(
fn=process_files,
inputs=[reference_file, hypothesis_file],
outputs=results_output
)
demo.launch()
if __name__ == "__main__":
main()
|