File size: 3,780 Bytes
1550706
aff6746
 
 
1550706
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1550706
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1550706
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
d1bf32a
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c155fa9
 
 
aff6746
 
c155fa9
aff6746
 
c155fa9
 
 
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import spaces
import gradio as gr
import numpy as np

@spaces.GPU()
def calculate_wer(reference, hypothesis):
    reference_words = reference.split()
    hypothesis_words = hypothesis.split()

    m = len(reference_words)
    n = len(hypothesis_words)

    # Initialize DP table
    dp = np.zeros((m+1, n+1), dtype=np.int32)

    # Base cases
    for i in range(m+1):
        dp[i][0] = i
    for j in range(n+1):
        dp[0][j] = j

    # Fill DP table
    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1
            dp[i][j] = min(dp[i-1][j] + 1,      # Deletion
                           dp[i][j-1] + 1,      # Insertion
                           dp[i-1][j-1] + cost) # Substitution or no cost

    wer = dp[m][n] / m
    return wer

@spaces.GPU()
def calculate_cer(reference, hypothesis):
    reference = reference.replace(" ", "")
    hypothesis = hypothesis.replace(" ", "")

    m = len(reference)
    n = len(hypothesis)

    # Initialize DP table
    dp = np.zeros((m+1, n+1), dtype=np.int32)

    # Base cases
    for i in range(m+1):
        dp[i][0] = i
    for j in range(n+1):
        dp[0][j] = j

    # Fill DP table
    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if reference[i-1] == hypothesis[j-1] else 1
            dp[i][j] = min(dp[i-1][j] + 1,      # Deletion
                           dp[i][j-1] + 1,      # Insertion
                           dp[i-1][j-1] + cost) # Substitution or no cost

    cer = dp[m][n] / m
    return cer


@spaces.GPU()
def process_files(reference_file, hypothesis_file):
    try:
        with open(reference_file.name, 'r') as f:
            reference_text = f.read()

        with open(hypothesis_file.name, 'r') as f:
            hypothesis_text = f.read()

        wer_value = calculate_wer(reference_text, hypothesis_text)
        cer_value = calculate_cer(reference_text, hypothesis_text)

        return {
            "WER": wer_value,
            "CER": cer_value
        }
    except Exception as e:
        return {"error": str(e)}

def main():
    with gr.Blocks() as demo:
        gr.Markdown("# ASR Metrics Calculator")

        with gr.Row():
            reference_file = gr.File(label="Upload Reference File")
            hypothesis_file = gr.File(label="Upload Hypothesis File")

        with gr.Row():
            reference_preview = gr.Textbox(label="Reference Preview", lines=3)
            hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3)

        with gr.Row():
            compute_button = gr.Button("Compute Metrics")
            results_output = gr.JSON(label="Results")

        # Update previews when files are uploaded
        def update_previews(ref_file, hyp_file):
            ref_text = ""
            hyp_text = ""

            if ref_file:
                with open(ref_file.name, 'r') as f:
                    ref_text = f.read()[:200]  # Show first 200 characters
            if hyp_file:
                with open(hyp_file.name, 'r') as f:
                    hyp_text = f.read()[:200]  # Show first 200 characters

            return ref_text, hyp_text

        reference_file.change(
            fn=update_previews,
            inputs=[reference_file, hypothesis_file],
            outputs=[reference_preview, hypothesis_preview]
        )
        hypothesis_file.change(
            fn=update_previews,
            inputs=[reference_file, hypothesis_file],
            outputs=[reference_preview, hypothesis_preview]
        )

        compute_button.click(
            fn=process_files,
            inputs=[reference_file, hypothesis_file],
            outputs=results_output
        )

    demo.launch()

if __name__ == "__main__":
    main()