Spaces:

speech-uk
/

evaluate-asr-outputs

Sleeping

App Files Files Community

Yehor commited on Jun 3

Commit

a9252f6

1 Parent(s): 8a89db0

Init

Browse files

Files changed (8) hide show

.dockerignore +2 -0
.gitignore +5 -0
Dockerfile +56 -0
README.md +28 -9
app.py +310 -0
evaluation_results.jsonl +0 -0
evaluation_results_batch.jsonl +0 -0
requirements.txt +6 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .ruff_cache/
2	+ .venv/

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.idea/
+.venv/
+.ruff_cache/
+flagged/

Dockerfile ADDED Viewed

	@@ -0,0 +1,56 @@

+FROM python:3.13.2-bookworm
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    ca-certificates \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
+    # gradio dependencies \
+    ffmpeg \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+RUN python -m ensurepip --upgrade && python -m pip install --upgrade pip
+RUN useradd -m -u 1001 hf-space
+USER hf-space
+ENV HOME=/home/hf-space \
+    PATH=/home/hf-space/.local/bin:${PATH} \
+    PYTHONPATH=/home/hf-space/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces \
+    HF_HOME=/home/hf-space/app/hf-home
+COPY --chown=hf-space:hf-space . ${HOME}/app
+WORKDIR ${HOME}/app
+RUN mkdir ${HF_HOME} && chmod a+rwx ${HF_HOME}
+RUN pip install --no-cache-dir -r /home/hf-space/app/requirements.txt
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,12 +1,31 @@
 ---
-title: Evaluate Asr Outputs
-emoji: 👁
-colorFrom: indigo
-colorTo: blue
-sdk: gradio
-sdk_version: 5.32.1
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+license: apache-2.0
+title: Evaluate ASR outputs
+sdk: docker
+emoji: 👀
+colorFrom: green
+colorTo: gray
+short_description: 'Calculate WER/CER values from JSONL files made by ASR models'
 ---
+## Install
+```shell
+uv venv --python 3.13.2
+source .venv/bin/activate
+uv pip install -r requirements.txt
+```
+## Build image
+```shell
+docker build -t evaluate-asr-outputs .
+```
+## Run
+```shell
+docker run -it --rm -p 8888:7860 evaluate-asr-outputs
+```

app.py ADDED Viewed

	@@ -0,0 +1,310 @@

+import sys
+import re
+from importlib.metadata import version
+import evaluate
+import polars as pl
+import gradio as gr
+from natsort import natsorted
+# Load evaluators
+wer = evaluate.load("wer")
+cer = evaluate.load("cer")
+# Config
+concurrency_limit = 5
+title = "Evaluate ASR Outputs"
+# https://www.tablesgenerator.com/markdown_tables
+authors_table = """
+## Authors
+Follow them on social networks and **contact** if you need any help or have any questions:
+| <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
+|-------------------------------------------------------------------------------------------------|
+| https://t.me/smlkw in Telegram                                                                  |
+| https://x.com/yehor_smoliakov at X                                                              |
+| https://github.com/egorsmkv at GitHub                                                           |
+| https://huggingface.co/Yehor at Hugging Face                                                    |
+| or use egorsmkv@gmail.com                                                                       |
+""".strip()
+examples = [
+    ["evaluation_results.jsonl", True, False, False],
+    ["evaluation_results_batch.jsonl", True, False, True],
+]
+description_head = f"""
+# {title}
+## Overview
+Upload a JSONL file generated by the ASR model.
+""".strip()
+description_foot = f"""
+{authors_table}
+""".strip()
+metrics_value = """
+Metrics will appear here.
+""".strip()
+tech_env = f"""
+#### Environment
+- Python: {sys.version}
+""".strip()
+tech_libraries = f"""
+#### Libraries
+- evaluate: {version("evaluate")}
+- gradio: {version("gradio")}
+- jiwer: {version("jiwer")}
+- polars: {version("polars")}
+""".strip()
+def clean_value(x):
+    s = (
+        x.replace("’", "'")
+        .strip()
+        .lower()
+        .replace(":", " ")
+        .replace(",", " ")
+        .replace(".", " ")
+        .replace("?", " ")
+        .replace("!", " ")
+        .replace("–", " ")
+        .replace("«", " ")
+        .replace("»", " ")
+        .replace("—", " ")
+        .replace("…", " ")
+        .replace("/", " ")
+        .replace("\\", " ")
+        .replace("(", " ")
+        .replace(")", " ")
+        .replace("́", "")
+        .replace('"', " ")
+    )
+    s = re.sub(r" +", " ", s)
+    s = s.strip()
+    # print(s)
+    return s
+def inference(file_name, _clear_punctuation_references, _clear_punctuation_predictions, _show_chars, _batch_mode):
+    if not file_name:
+        raise gr.Error("Please paste your JSON file.")
+    df = pl.read_ndjson(file_name)
+    total_rows = len(df)
+    df = df.drop_nulls()
+    filtered_rows = len(df)
+    if total_rows != filtered_rows:
+        gr.Info(f"Total rows in the file: {total_rows}, but after dropping rows with NULL values there are: {filtered_rows} rows. Seems like a corrupted file.")
+    required_columns = [
+        "filename",
+        "inference_start",
+        "inference_end",
+        "inference_total",
+        "duration",
+        "reference",
+        "prediction",
+    ]
+    required_columns_batch = [
+        "inference_start",
+        "inference_end",
+        "inference_total",
+        "filenames",
+        "durations",
+        "references",
+        "predictions",
+    ]
+    inference_seconds = df["inference_total"].sum()
+    if _batch_mode:
+        if not all(col in df.columns for col in required_columns_batch):
+            raise gr.Error(
+                f"Please provide a JSONL file with the following columns: {required_columns_batch}"
+            )
+        duration_seconds = 0
+        for durations in df["durations"]:
+            duration_seconds += durations.sum()
+        rtf = inference_seconds / duration_seconds
+        references_batch = df["references"]
+        predictions_batch = df["predictions"]
+        references = []
+        for reference in references_batch:
+            if _clear_punctuation_references:
+                reference = reference.map_elements(
+                    clean_value, return_dtype=pl.String
+                )
+                references.extend(reference)
+            else:
+                references.extend(reference)
+        predictions = []
+        for prediction in predictions_batch:
+            if _clear_punctuation_predictions:
+                prediction = prediction.map_elements(
+                    clean_value, return_dtype=pl.String
+                )
+                predictions.extend(prediction)
+            else:
+                predictions.extend(prediction)
+    else:
+        if not all(col in df.columns for col in required_columns):
+            raise gr.Error(
+                f"Please provide a JSONL file with the following columns: {required_columns}"
+            )
+        duration_seconds = df["duration"].sum()
+        rtf = inference_seconds / duration_seconds
+        if _clear_punctuation_references:
+            references = df["reference"].map_elements(
+                clean_value, return_dtype=pl.String
+            )
+        else:
+            references = df["reference"]
+        if _clear_punctuation_predictions:
+            predictions = df["prediction"].map_elements(
+                clean_value, return_dtype=pl.String
+            )
+        else:
+            predictions = df["prediction"]
+    n_predictions = len(predictions)
+    n_references = len(references)
+    # Evaluate
+    wer_value = round(wer.compute(predictions=predictions, references=references), 4)
+    cer_value = round(cer.compute(predictions=predictions, references=references), 4)
+    inference_time = inference_seconds
+    audio_duration = duration_seconds
+    rtf = inference_time / audio_duration
+    results = []
+    results.append(
+        f"- Number of references / predictions: {n_references} / {n_predictions}"
+    )
+    results.append(f"")
+    results.append(f"- WER: {wer_value} metric, {round(wer_value * 100, 4)}%")
+    results.append(f"- CER: {cer_value} metric, {round(cer_value * 100, 4)}%")
+    results.append("")
+    results.append(f"- Accuracy on words: {round(100 - 100 * wer_value, 4)}%")
+    results.append(f"- Accuracy on chars: {round(100 - 100 * cer_value, 4)}%")
+    results.append("")
+    results.append(
+        f"- Inference time: {round(inference_time, 4)} seconds, {round(inference_time / 60, 4)} mins, {round(inference_time / 60 / 60, 4)} hours"
+    )
+    results.append(
+        f"- Audio duration: {round(audio_duration, 4)} seconds, {round(audio_duration / 60 / 60, 4)} hours"
+    )
+    results.append("")
+    results.append(f"- RTF: {round(rtf, 4)}")
+    if _show_chars:
+        all_chars = set()
+        for ref in references:
+            for c in ref:
+                all_chars.add(c)
+        sorted_chars = natsorted(list(all_chars))
+        results.append("")
+        results.append(f"Chars in references:")
+        results.append(f"{sorted_chars}")
+        all_chars = set()
+        for pred in predictions:
+            for c in pred:
+                all_chars.add(c)
+        sorted_chars = natsorted(list(all_chars))
+        results.append("")
+        results.append(f"Chars in predictions:")
+        results.append(f"{sorted_chars}")
+    return "\n".join(results)
+demo = gr.Blocks(
+    title=title,
+    analytics_enabled=False,
+    theme=gr.themes.Base(),
+)
+with demo:
+    gr.Markdown(description_head)
+    gr.Markdown("## Usage")
+    with gr.Row():
+        with gr.Column():
+            jsonl_file = gr.File(label="A JSONL file")
+            clear_punctuation_references = gr.Checkbox(
+                label="Clear punctuation (in references), some chars and convert to lowercase",
+            )
+            clear_punctuation_predictions = gr.Checkbox(
+                label="Clear punctuation (in predictions), some chars and convert to lowercase",
+            )
+            show_chars = gr.Checkbox(
+                label="Show chars in references/predictions",
+            )
+            batch_mode = gr.Checkbox(
+                label="Use batch mode",
+            )
+        metrics = gr.Textbox(
+            label="Metrics",
+            placeholder=metrics_value,
+            show_copy_button=True,
+        )
+    gr.Button("Calculate").click(
+        inference,
+        concurrency_limit=concurrency_limit,
+        inputs=[jsonl_file, clear_punctuation_references, clear_punctuation_predictions, show_chars, batch_mode],
+        outputs=metrics,
+    )
+    with gr.Row():
+        gr.Examples(
+            label="Choose an example",
+            inputs=[jsonl_file, clear_punctuation_references, clear_punctuation_predictions, show_chars, batch_mode],
+            examples=examples,
+        )
+    gr.Markdown(description_foot)
+    gr.Markdown("### Gradio app uses:")
+    gr.Markdown(tech_env)
+    gr.Markdown(tech_libraries)
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch()

evaluation_results.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

evaluation_results_batch.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio==5.23.0
+evaluate==0.4.3
+jiwer==3.1.0
+polars==1.26.0
+natsort==8.4.0