File size: 3,972 Bytes
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
import numpy as np

def calculate_wer(reference, hypothesis):
    reference_words = reference.split()
    hypothesis_words = hypothesis.split()

    m = len(reference_words)
    n = len(hypothesis_words)

    # Initialize DP table
    dp = np.zeros((m+1, n+1), dtype=np.int32)

    # Base cases
    for i in range(m+1):
        dp[i][0] = i
    for j in range(n+1):
        dp[0][j] = j

    # Fill DP table
    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1
            dp[i][j] = min(dp[i-1][j] + 1,      # Deletion
                           dp[i][j-1] + 1,      # Insertion
                           dp[i-1][j-1] + cost) # Substitution or no cost

    wer = dp[m][n] / m
    return wer

def calculate_cer(reference, hypothesis):
    reference = reference.replace(" ", "")
    hypothesis = hypothesis.replace(" ", "")

    m = len(reference)
    n = len(hypothesis)

    # Initialize DP table
    dp = np.zeros((m+1, n+1), dtype=np.int32)

    # Base cases
    for i in range(m+1):
        dp[i][0] = i
    for j in range(n+1):
        dp[0][j] = j

    # Fill DP table
    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if reference[i-1] == hypothesis[j-1] else 1
            dp[i][j] = min(dp[i-1][j] + 1,      # Deletion
                           dp[i][j-1] + 1,      # Insertion
                           dp[i-1][j-1] + cost) # Substitution or no cost

    cer = dp[m][n] / m
    return cer

def calculate_mer(reference, hypothesis):
    # Custom mission error rate calculation
    # Replace this with your specific logic
    return 0.0

def process_files(reference_file, hypothesis_file):
    try:
        with open(reference_file.name, 'r') as f:
            reference_text = f.read()

        with open(hypothesis_file.name, 'r') as f:
            hypothesis_text = f.read()

        wer_value = calculate_wer(reference_text, hypothesis_text)
        cer_value = calculate_cer(reference_text, hypothesis_text)
        mer_value = calculate_mer(reference_text, hypothesis_text)

        return {
            "WER": wer_value,
            "CER": cer_value,
            "MER": mer_value
        }
    except Exception as e:
        return {"error": str(e)}

def main():
    with gr.Blocks() as demo:
        gr.Markdown("# ASR Metrics Calculator")

        with gr.Row():
            reference_file = gr.File(label="Upload Reference File")
            hypothesis_file = gr.File(label="Upload Hypothesis File")

        with gr.Row():
            reference_preview = gr.Textbox(label="Reference Preview", lines=3)
            hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3)

        with gr.Row():
            compute_button = gr.Button("Compute Metrics")
            results_output = gr.JSON(label="Results")

        # Update previews when files are uploaded
        def update_previews(ref_file, hyp_file):
            if ref_file:
                with open(ref_file.name, 'r') as f:
                    ref_text = f.read()
                    return ref_text[:200]  # Show first 200 characters
            if hyp_file:
                with open(hyp_file.name, 'r') as f:
                    hyp_text = f.read()
                    return hyp_text[:200]  # Show first 200 characters
            return "", ""

        reference_file.change(
            fn=update_previews,
            inputs=[reference_file, hypothesis_file],
            outputs=[reference_preview, hypothesis_preview]
        )
        hypothesis_file.change(
            fn=update_previews,
            inputs=[reference_file, hypothesis_file],
            outputs=[reference_preview, hypothesis_preview]
        )

        compute_button.click(
            fn=process_files,
            inputs=[reference_file, hypothesis_file],
            outputs=results_output
        )

    demo.launch()

if __name__ == "__main__":
    main()