Spaces:

Slamlab
/

asr_metrics

Sleeping

File size: 3,972 Bytes

aff6746

import gradio as gr
import numpy as np

def calculate_wer(reference, hypothesis):
    reference_words = reference.split()
    hypothesis_words = hypothesis.split()

    m = len(reference_words)
    n = len(hypothesis_words)

    # Initialize DP table
    dp = np.zeros((m+1, n+1), dtype=np.int32)

    # Base cases
    for i in range(m+1):
        dp[i][0] = i
    for j in range(n+1):
        dp[0][j] = j

    # Fill DP table
    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1
            dp[i][j] = min(dp[i-1][j] + 1,      # Deletion
                           dp[i][j-1] + 1,      # Insertion
                           dp[i-1][j-1] + cost) # Substitution or no cost

    wer = dp[m][n] / m
    return wer

def calculate_cer(reference, hypothesis):
    reference = reference.replace(" ", "")
    hypothesis = hypothesis.replace(" ", "")

    m = len(reference)
    n = len(hypothesis)

    # Initialize DP table
    dp = np.zeros((m+1, n+1), dtype=np.int32)

    # Base cases
    for i in range(m+1):
        dp[i][0] = i
    for j in range(n+1):
        dp[0][j] = j

    # Fill DP table
    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if reference[i-1] == hypothesis[j-1] else 1
            dp[i][j] = min(dp[i-1][j] + 1,      # Deletion
                           dp[i][j-1] + 1,      # Insertion
                           dp[i-1][j-1] + cost) # Substitution or no cost

    cer = dp[m][n] / m
    return cer

def calculate_mer(reference, hypothesis):
    # Custom mission error rate calculation
    # Replace this with your specific logic
    return 0.0

def process_files(reference_file, hypothesis_file):
    try:
        with open(reference_file.name, 'r') as f:
            reference_text = f.read()

        with open(hypothesis_file.name, 'r') as f:
            hypothesis_text = f.read()

        wer_value = calculate_wer(reference_text, hypothesis_text)
        cer_value = calculate_cer(reference_text, hypothesis_text)
        mer_value = calculate_mer(reference_text, hypothesis_text)

        return {
            "WER": wer_value,
            "CER": cer_value,
            "MER": mer_value
        }
    except Exception as e:
        return {"error": str(e)}

def main():
    with gr.Blocks() as demo:
        gr.Markdown("# ASR Metrics Calculator")

        with gr.Row():
            reference_file = gr.File(label="Upload Reference File")
            hypothesis_file = gr.File(label="Upload Hypothesis File")

        with gr.Row():
            reference_preview = gr.Textbox(label="Reference Preview", lines=3)
            hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3)

        with gr.Row():
            compute_button = gr.Button("Compute Metrics")
            results_output = gr.JSON(label="Results")

        # Update previews when files are uploaded
        def update_previews(ref_file, hyp_file):
            if ref_file:
                with open(ref_file.name, 'r') as f:
                    ref_text = f.read()
                    return ref_text[:200]  # Show first 200 characters
            if hyp_file:
                with open(hyp_file.name, 'r') as f:
                    hyp_text = f.read()
                    return hyp_text[:200]  # Show first 200 characters
            return "", ""

        reference_file.change(
            fn=update_previews,
            inputs=[reference_file, hypothesis_file],
            outputs=[reference_preview, hypothesis_preview]
        )
        hypothesis_file.change(
            fn=update_previews,
            inputs=[reference_file, hypothesis_file],
            outputs=[reference_preview, hypothesis_preview]
        )

        compute_button.click(
            fn=process_files,
            inputs=[reference_file, hypothesis_file],
            outputs=results_output
        )

    demo.launch()

if __name__ == "__main__":
    main()