Yehor commited on
Commit
a9252f6
·
1 Parent(s): 8a89db0
.dockerignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .ruff_cache/
2
+ .venv/
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .idea/
2
+ .venv/
3
+ .ruff_cache/
4
+
5
+ flagged/
Dockerfile ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13.2-bookworm
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ RUN apt-get update && \
6
+ apt-get upgrade -y && \
7
+ apt-get install -y --no-install-recommends \
8
+ git \
9
+ git-lfs \
10
+ wget \
11
+ curl \
12
+ ca-certificates \
13
+ # python build dependencies \
14
+ build-essential \
15
+ libssl-dev \
16
+ zlib1g-dev \
17
+ libbz2-dev \
18
+ libreadline-dev \
19
+ libsqlite3-dev \
20
+ libncursesw5-dev \
21
+ xz-utils \
22
+ tk-dev \
23
+ libxml2-dev \
24
+ libxmlsec1-dev \
25
+ libffi-dev \
26
+ liblzma-dev \
27
+ # gradio dependencies \
28
+ ffmpeg \
29
+ && apt-get clean \
30
+ && rm -rf /var/lib/apt/lists/*
31
+
32
+ RUN python -m ensurepip --upgrade && python -m pip install --upgrade pip
33
+
34
+ RUN useradd -m -u 1001 hf-space
35
+ USER hf-space
36
+
37
+ ENV HOME=/home/hf-space \
38
+ PATH=/home/hf-space/.local/bin:${PATH} \
39
+ PYTHONPATH=/home/hf-space/app \
40
+ PYTHONUNBUFFERED=1 \
41
+ GRADIO_ALLOW_FLAGGING=never \
42
+ GRADIO_NUM_PORTS=1 \
43
+ GRADIO_SERVER_NAME=0.0.0.0 \
44
+ GRADIO_THEME=huggingface \
45
+ SYSTEM=spaces \
46
+ HF_HOME=/home/hf-space/app/hf-home
47
+
48
+ COPY --chown=hf-space:hf-space . ${HOME}/app
49
+
50
+ WORKDIR ${HOME}/app
51
+
52
+ RUN mkdir ${HF_HOME} && chmod a+rwx ${HF_HOME}
53
+
54
+ RUN pip install --no-cache-dir -r /home/hf-space/app/requirements.txt
55
+
56
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,31 @@
1
  ---
2
- title: Evaluate Asr Outputs
3
- emoji: 👁
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 5.32.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: apache-2.0
3
+ title: Evaluate ASR outputs
4
+ sdk: docker
5
+ emoji: 👀
6
+ colorFrom: green
7
+ colorTo: gray
8
+ short_description: 'Calculate WER/CER values from JSONL files made by ASR models'
 
9
  ---
10
 
11
+ ## Install
12
+
13
+ ```shell
14
+ uv venv --python 3.13.2
15
+
16
+ source .venv/bin/activate
17
+
18
+ uv pip install -r requirements.txt
19
+ ```
20
+
21
+ ## Build image
22
+
23
+ ```shell
24
+ docker build -t evaluate-asr-outputs .
25
+ ```
26
+
27
+ ## Run
28
+
29
+ ```shell
30
+ docker run -it --rm -p 8888:7860 evaluate-asr-outputs
31
+ ```
app.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import re
3
+
4
+ from importlib.metadata import version
5
+
6
+ import evaluate
7
+ import polars as pl
8
+ import gradio as gr
9
+ from natsort import natsorted
10
+
11
+ # Load evaluators
12
+ wer = evaluate.load("wer")
13
+ cer = evaluate.load("cer")
14
+
15
+ # Config
16
+ concurrency_limit = 5
17
+
18
+ title = "Evaluate ASR Outputs"
19
+
20
+ # https://www.tablesgenerator.com/markdown_tables
21
+ authors_table = """
22
+ ## Authors
23
+
24
+ Follow them on social networks and **contact** if you need any help or have any questions:
25
+
26
+ | <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
27
+ |-------------------------------------------------------------------------------------------------|
28
+ | https://t.me/smlkw in Telegram |
29
+ | https://x.com/yehor_smoliakov at X |
30
+ | https://github.com/egorsmkv at GitHub |
31
+ | https://huggingface.co/Yehor at Hugging Face |
32
+ | or use egorsmkv@gmail.com |
33
+ """.strip()
34
+
35
+ examples = [
36
+ ["evaluation_results.jsonl", True, False, False],
37
+ ["evaluation_results_batch.jsonl", True, False, True],
38
+ ]
39
+
40
+ description_head = f"""
41
+ # {title}
42
+
43
+ ## Overview
44
+
45
+ Upload a JSONL file generated by the ASR model.
46
+ """.strip()
47
+
48
+ description_foot = f"""
49
+ {authors_table}
50
+ """.strip()
51
+
52
+ metrics_value = """
53
+ Metrics will appear here.
54
+ """.strip()
55
+
56
+ tech_env = f"""
57
+ #### Environment
58
+
59
+ - Python: {sys.version}
60
+ """.strip()
61
+
62
+ tech_libraries = f"""
63
+ #### Libraries
64
+
65
+ - evaluate: {version("evaluate")}
66
+ - gradio: {version("gradio")}
67
+ - jiwer: {version("jiwer")}
68
+ - polars: {version("polars")}
69
+ """.strip()
70
+
71
+
72
+ def clean_value(x):
73
+ s = (
74
+ x.replace("’", "'")
75
+ .strip()
76
+ .lower()
77
+ .replace(":", " ")
78
+ .replace(",", " ")
79
+ .replace(".", " ")
80
+ .replace("?", " ")
81
+ .replace("!", " ")
82
+ .replace("–", " ")
83
+ .replace("«", " ")
84
+ .replace("»", " ")
85
+ .replace("—", " ")
86
+ .replace("…", " ")
87
+ .replace("/", " ")
88
+ .replace("\\", " ")
89
+ .replace("(", " ")
90
+ .replace(")", " ")
91
+ .replace("́", "")
92
+ .replace('"', " ")
93
+ )
94
+
95
+ s = re.sub(r" +", " ", s)
96
+
97
+ s = s.strip()
98
+
99
+ # print(s)
100
+
101
+ return s
102
+
103
+
104
+ def inference(file_name, _clear_punctuation_references, _clear_punctuation_predictions, _show_chars, _batch_mode):
105
+ if not file_name:
106
+ raise gr.Error("Please paste your JSON file.")
107
+
108
+ df = pl.read_ndjson(file_name)
109
+ total_rows = len(df)
110
+ df = df.drop_nulls()
111
+ filtered_rows = len(df)
112
+
113
+ if total_rows != filtered_rows:
114
+ gr.Info(f"Total rows in the file: {total_rows}, but after dropping rows with NULL values there are: {filtered_rows} rows. Seems like a corrupted file.")
115
+
116
+ required_columns = [
117
+ "filename",
118
+ "inference_start",
119
+ "inference_end",
120
+ "inference_total",
121
+ "duration",
122
+ "reference",
123
+ "prediction",
124
+ ]
125
+ required_columns_batch = [
126
+ "inference_start",
127
+ "inference_end",
128
+ "inference_total",
129
+ "filenames",
130
+ "durations",
131
+ "references",
132
+ "predictions",
133
+ ]
134
+
135
+ inference_seconds = df["inference_total"].sum()
136
+
137
+ if _batch_mode:
138
+ if not all(col in df.columns for col in required_columns_batch):
139
+ raise gr.Error(
140
+ f"Please provide a JSONL file with the following columns: {required_columns_batch}"
141
+ )
142
+
143
+ duration_seconds = 0
144
+ for durations in df["durations"]:
145
+ duration_seconds += durations.sum()
146
+
147
+ rtf = inference_seconds / duration_seconds
148
+
149
+ references_batch = df["references"]
150
+ predictions_batch = df["predictions"]
151
+
152
+ references = []
153
+ for reference in references_batch:
154
+ if _clear_punctuation_references:
155
+ reference = reference.map_elements(
156
+ clean_value, return_dtype=pl.String
157
+ )
158
+ references.extend(reference)
159
+ else:
160
+ references.extend(reference)
161
+
162
+ predictions = []
163
+ for prediction in predictions_batch:
164
+ if _clear_punctuation_predictions:
165
+ prediction = prediction.map_elements(
166
+ clean_value, return_dtype=pl.String
167
+ )
168
+ predictions.extend(prediction)
169
+ else:
170
+ predictions.extend(prediction)
171
+ else:
172
+ if not all(col in df.columns for col in required_columns):
173
+ raise gr.Error(
174
+ f"Please provide a JSONL file with the following columns: {required_columns}"
175
+ )
176
+
177
+ duration_seconds = df["duration"].sum()
178
+
179
+ rtf = inference_seconds / duration_seconds
180
+
181
+ if _clear_punctuation_references:
182
+ references = df["reference"].map_elements(
183
+ clean_value, return_dtype=pl.String
184
+ )
185
+ else:
186
+ references = df["reference"]
187
+
188
+ if _clear_punctuation_predictions:
189
+ predictions = df["prediction"].map_elements(
190
+ clean_value, return_dtype=pl.String
191
+ )
192
+ else:
193
+ predictions = df["prediction"]
194
+
195
+ n_predictions = len(predictions)
196
+ n_references = len(references)
197
+
198
+ # Evaluate
199
+ wer_value = round(wer.compute(predictions=predictions, references=references), 4)
200
+ cer_value = round(cer.compute(predictions=predictions, references=references), 4)
201
+
202
+ inference_time = inference_seconds
203
+ audio_duration = duration_seconds
204
+
205
+ rtf = inference_time / audio_duration
206
+
207
+ results = []
208
+
209
+ results.append(
210
+ f"- Number of references / predictions: {n_references} / {n_predictions}"
211
+ )
212
+ results.append(f"")
213
+ results.append(f"- WER: {wer_value} metric, {round(wer_value * 100, 4)}%")
214
+ results.append(f"- CER: {cer_value} metric, {round(cer_value * 100, 4)}%")
215
+ results.append("")
216
+ results.append(f"- Accuracy on words: {round(100 - 100 * wer_value, 4)}%")
217
+ results.append(f"- Accuracy on chars: {round(100 - 100 * cer_value, 4)}%")
218
+ results.append("")
219
+ results.append(
220
+ f"- Inference time: {round(inference_time, 4)} seconds, {round(inference_time / 60, 4)} mins, {round(inference_time / 60 / 60, 4)} hours"
221
+ )
222
+ results.append(
223
+ f"- Audio duration: {round(audio_duration, 4)} seconds, {round(audio_duration / 60 / 60, 4)} hours"
224
+ )
225
+ results.append("")
226
+ results.append(f"- RTF: {round(rtf, 4)}")
227
+
228
+ if _show_chars:
229
+ all_chars = set()
230
+ for ref in references:
231
+ for c in ref:
232
+ all_chars.add(c)
233
+
234
+ sorted_chars = natsorted(list(all_chars))
235
+
236
+ results.append("")
237
+ results.append(f"Chars in references:")
238
+ results.append(f"{sorted_chars}")
239
+
240
+ all_chars = set()
241
+ for pred in predictions:
242
+ for c in pred:
243
+ all_chars.add(c)
244
+
245
+ sorted_chars = natsorted(list(all_chars))
246
+
247
+ results.append("")
248
+ results.append(f"Chars in predictions:")
249
+ results.append(f"{sorted_chars}")
250
+
251
+ return "\n".join(results)
252
+
253
+
254
+ demo = gr.Blocks(
255
+ title=title,
256
+ analytics_enabled=False,
257
+ theme=gr.themes.Base(),
258
+ )
259
+
260
+ with demo:
261
+ gr.Markdown(description_head)
262
+
263
+ gr.Markdown("## Usage")
264
+
265
+ with gr.Row():
266
+ with gr.Column():
267
+ jsonl_file = gr.File(label="A JSONL file")
268
+
269
+ clear_punctuation_references = gr.Checkbox(
270
+ label="Clear punctuation (in references), some chars and convert to lowercase",
271
+ )
272
+ clear_punctuation_predictions = gr.Checkbox(
273
+ label="Clear punctuation (in predictions), some chars and convert to lowercase",
274
+ )
275
+ show_chars = gr.Checkbox(
276
+ label="Show chars in references/predictions",
277
+ )
278
+ batch_mode = gr.Checkbox(
279
+ label="Use batch mode",
280
+ )
281
+
282
+ metrics = gr.Textbox(
283
+ label="Metrics",
284
+ placeholder=metrics_value,
285
+ show_copy_button=True,
286
+ )
287
+
288
+ gr.Button("Calculate").click(
289
+ inference,
290
+ concurrency_limit=concurrency_limit,
291
+ inputs=[jsonl_file, clear_punctuation_references, clear_punctuation_predictions, show_chars, batch_mode],
292
+ outputs=metrics,
293
+ )
294
+
295
+ with gr.Row():
296
+ gr.Examples(
297
+ label="Choose an example",
298
+ inputs=[jsonl_file, clear_punctuation_references, clear_punctuation_predictions, show_chars, batch_mode],
299
+ examples=examples,
300
+ )
301
+
302
+ gr.Markdown(description_foot)
303
+
304
+ gr.Markdown("### Gradio app uses:")
305
+ gr.Markdown(tech_env)
306
+ gr.Markdown(tech_libraries)
307
+
308
+ if __name__ == "__main__":
309
+ demo.queue()
310
+ demo.launch()
evaluation_results.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
evaluation_results_batch.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==5.23.0
2
+
3
+ evaluate==0.4.3
4
+ jiwer==3.1.0
5
+ polars==1.26.0
6
+ natsort==8.4.0