File size: 4,305 Bytes
1550706
aff6746
 
 
820ab2f
 
 
 
 
3c4a41b
 
 
820ab2f
5ae646e
820ab2f
 
 
 
 
1550706
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1550706
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1550706
aff6746
 
 
 
 
 
 
 
 
 
0c9415b
aff6746
 
 
820ab2f
 
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c155fa9
 
 
aff6746
 
c155fa9
aff6746
 
c155fa9
 
 
aff6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import spaces
import gradio as gr
import numpy as np

@spaces.GPU()
def get_mismatched_sentences(reference, hypothesis):
    """
    Get mismatched sentences between reference and hypothesis.
    """
    reference = reference.split()
    hypothesis = hypothesis.split()
    
    mismatched = []
    for ref, hyp in zip(reference, hypothesis):
        if ref != hyp:
            mismatched.append((ref, hyp))

    return mismatched

@spaces.GPU()
def calculate_wer(reference, hypothesis):
    reference_words = reference.split()
    hypothesis_words = hypothesis.split()

    m = len(reference_words)
    n = len(hypothesis_words)

    # Initialize DP table
    dp = np.zeros((m+1, n+1), dtype=np.int32)

    # Base cases
    for i in range(m+1):
        dp[i][0] = i
    for j in range(n+1):
        dp[0][j] = j

    # Fill DP table
    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1
            dp[i][j] = min(dp[i-1][j] + 1,      # Deletion
                           dp[i][j-1] + 1,      # Insertion
                           dp[i-1][j-1] + cost) # Substitution or no cost

    wer = dp[m][n] / m
    return wer

@spaces.GPU()
def calculate_cer(reference, hypothesis):
    reference = reference.replace(" ", "")
    hypothesis = hypothesis.replace(" ", "")

    m = len(reference)
    n = len(hypothesis)

    # Initialize DP table
    dp = np.zeros((m+1, n+1), dtype=np.int32)

    # Base cases
    for i in range(m+1):
        dp[i][0] = i
    for j in range(n+1):
        dp[0][j] = j

    # Fill DP table
    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if reference[i-1] == hypothesis[j-1] else 1
            dp[i][j] = min(dp[i-1][j] + 1,      # Deletion
                           dp[i][j-1] + 1,      # Insertion
                           dp[i-1][j-1] + cost) # Substitution or no cost

    cer = dp[m][n] / m
    return cer


@spaces.GPU()
def process_files(reference_file, hypothesis_file):
    try:
        with open(reference_file.name, 'r') as f:
            reference_text = f.read()

        with open(hypothesis_file.name, 'r') as f:
            hypothesis_text = f.read()

        wer_value = calculate_wer(reference_text, hypothesis_text)
        cer_value = calculate_cer(reference_text, hypothesis_text)
        mismatched_sentences = get_mismatched_sentences(reference_text, hypothesis_text)

        return {
            "WER": wer_value,
            "CER": cer_value,
            "Mismatched Sentences": mismatched_sentences
        }
    except Exception as e:
        return {"error": str(e)}

def main():
    with gr.Blocks() as demo:
        gr.Markdown("# ASR Metrics Calculator")

        with gr.Row():
            reference_file = gr.File(label="Upload Reference File")
            hypothesis_file = gr.File(label="Upload Hypothesis File")

        with gr.Row():
            reference_preview = gr.Textbox(label="Reference Preview", lines=3)
            hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3)

        with gr.Row():
            compute_button = gr.Button("Compute Metrics")
            results_output = gr.JSON(label="Results")

        # Update previews when files are uploaded
        def update_previews(ref_file, hyp_file):
            ref_text = ""
            hyp_text = ""

            if ref_file:
                with open(ref_file.name, 'r') as f:
                    ref_text = f.read()[:200]  # Show first 200 characters
            if hyp_file:
                with open(hyp_file.name, 'r') as f:
                    hyp_text = f.read()[:200]  # Show first 200 characters

            return ref_text, hyp_text

        reference_file.change(
            fn=update_previews,
            inputs=[reference_file, hypothesis_file],
            outputs=[reference_preview, hypothesis_preview]
        )
        hypothesis_file.change(
            fn=update_previews,
            inputs=[reference_file, hypothesis_file],
            outputs=[reference_preview, hypothesis_preview]
        )

        compute_button.click(
            fn=process_files,
            inputs=[reference_file, hypothesis_file],
            outputs=results_output
        )

    demo.launch()

if __name__ == "__main__":
    main()