import gradio as gr import numpy as np def calculate_wer(reference, hypothesis): reference_words = reference.split() hypothesis_words = hypothesis.split() m = len(reference_words) n = len(hypothesis_words) # Initialize DP table dp = np.zeros((m+1, n+1), dtype=np.int32) # Base cases for i in range(m+1): dp[i][0] = i for j in range(n+1): dp[0][j] = j # Fill DP table for i in range(1, m+1): for j in range(1, n+1): cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1 dp[i][j] = min(dp[i-1][j] + 1, # Deletion dp[i][j-1] + 1, # Insertion dp[i-1][j-1] + cost) # Substitution or no cost wer = dp[m][n] / m return wer def calculate_cer(reference, hypothesis): reference = reference.replace(" ", "") hypothesis = hypothesis.replace(" ", "") m = len(reference) n = len(hypothesis) # Initialize DP table dp = np.zeros((m+1, n+1), dtype=np.int32) # Base cases for i in range(m+1): dp[i][0] = i for j in range(n+1): dp[0][j] = j # Fill DP table for i in range(1, m+1): for j in range(1, n+1): cost = 0 if reference[i-1] == hypothesis[j-1] else 1 dp[i][j] = min(dp[i-1][j] + 1, # Deletion dp[i][j-1] + 1, # Insertion dp[i-1][j-1] + cost) # Substitution or no cost cer = dp[m][n] / m return cer def calculate_mer(reference, hypothesis): # Custom mission error rate calculation # Replace this with your specific logic return 0.0 def process_files(reference_file, hypothesis_file): try: with open(reference_file.name, 'r') as f: reference_text = f.read() with open(hypothesis_file.name, 'r') as f: hypothesis_text = f.read() wer_value = calculate_wer(reference_text, hypothesis_text) cer_value = calculate_cer(reference_text, hypothesis_text) mer_value = calculate_mer(reference_text, hypothesis_text) return { "WER": wer_value, "CER": cer_value, "MER": mer_value } except Exception as e: return {"error": str(e)} def main(): with gr.Blocks() as demo: gr.Markdown("# ASR Metrics Calculator") with gr.Row(): reference_file = gr.File(label="Upload Reference File") hypothesis_file = gr.File(label="Upload Hypothesis File") with gr.Row(): reference_preview = gr.Textbox(label="Reference Preview", lines=3) hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3) with gr.Row(): compute_button = gr.Button("Compute Metrics") results_output = gr.JSON(label="Results") # Update previews when files are uploaded def update_previews(ref_file, hyp_file): if ref_file: with open(ref_file.name, 'r') as f: ref_text = f.read() return ref_text[:200] # Show first 200 characters if hyp_file: with open(hyp_file.name, 'r') as f: hyp_text = f.read() return hyp_text[:200] # Show first 200 characters return "", "" reference_file.change( fn=update_previews, inputs=[reference_file, hypothesis_file], outputs=[reference_preview, hypothesis_preview] ) hypothesis_file.change( fn=update_previews, inputs=[reference_file, hypothesis_file], outputs=[reference_preview, hypothesis_preview] ) compute_button.click( fn=process_files, inputs=[reference_file, hypothesis_file], outputs=results_output ) demo.launch() if __name__ == "__main__": main()