Spaces:

yoad
/

visualize_eval_results

Running

App Files Files Community

Yoad commited on about 1 month ago

Commit

7b3ae60

1 Parent(s): abf28c9

Add subs visualizer

Browse files

Files changed (3) hide show

src/app.py +23 -16
src/substitutions_visualizer.py +101 -0
src/visual_eval/evaluator.py +149 -17

src/app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from huggingface_hub import HfFileSystem
 from st_fixed_container import st_fixed_container
 from visual_eval.evaluator import HebrewTextNormalizer
 from visual_eval.visualization import render_visualize_jiwer_result_html
 HF_API_TOKEN = None
 try:
@@ -377,6 +378,8 @@ def main():
             # Toggle for normalized vs raw text
             use_normalized = st.sidebar.toggle("Use normalized text", value=True)
             # Create sidebar for entry selection
             st.sidebar.header("Select Entry")
@@ -495,22 +498,26 @@ def main():
             html = render_visualize_jiwer_result_html(ref, hyp)
             display_rtl(html)
-            # Display metadata
-            st.header("Metadata")
-            metadata_cols = [
-                "metadata_uuid",
-                "model",
-                "dataset",
-                "dataset_split",
-                "engine",
-            ]
-            metadata = eval_results.iloc[selected_entry][metadata_cols]
-            # Create a DataFrame for better display
-            metadata_df = pd.DataFrame(
-                {"Field": metadata_cols, "Value": [str(v) for v in metadata.values]}
-            )
-            st.table(metadata_df)
             # If we have audio URL, display it in the sticky container
             if "audio_url" in locals() and audio_url:

 from st_fixed_container import st_fixed_container
 from visual_eval.evaluator import HebrewTextNormalizer
 from visual_eval.visualization import render_visualize_jiwer_result_html
+from substitutions_visualizer import visualize_substitutions
 HF_API_TOKEN = None
 try:
             # Toggle for normalized vs raw text
             use_normalized = st.sidebar.toggle("Use normalized text", value=True)
+            show_metadata = st.sidebar.toggle("Show entry metadata", value=False)
+            visualize_subs = st.sidebar.toggle("List Substitutions", value=False)
             # Create sidebar for entry selection
             st.sidebar.header("Select Entry")
             html = render_visualize_jiwer_result_html(ref, hyp)
             display_rtl(html)
+            if show_metadata:
+                # Display metadata
+                st.header("Metadata")
+                metadata_cols = [
+                    "metadata_uuid",
+                    "model",
+                    "dataset",
+                    "dataset_split",
+                    "engine",
+                ]
+                metadata = eval_results.iloc[selected_entry][metadata_cols]
+                # Create a DataFrame for better display
+                metadata_df = pd.DataFrame(
+                    {"Field": metadata_cols, "Value": [str(v) for v in metadata.values]}
+                )
+                st.table(metadata_df)
+            if visualize_subs:
+                visualize_substitutions(ref, hyp)
             # If we have audio URL, display it in the sticky container
             if "audio_url" in locals() and audio_url:

src/substitutions_visualizer.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import pandas as pd
+import streamlit as st
+from jiwer import process_words
+from visual_eval.evaluator import extract_substitution_samples, HebrewTextNormalizer
+subs_table_styles = """
+<style>
+    .sub-table {
+        background: white;
+        width: 100%;
+        border-collapse: collapse;
+        color: black;
+    }
+    .sub-row {
+        cursor: pointer;
+        transition: all 0.2s;
+    }
+    .sub-row .txt {
+        text-align: center;
+    }
+    .sub-row:nth-child(even):hover {
+        background: #eee;
+    }
+    .sub-row:nth-child(odd):hover {
+        background: #eee;
+    }
+    .sub-row:nth-child(even):hover + .sub-row {
+        background: #eee;
+    }
+    .sub-row:nth-child(even):has(+ .sub-row:hover) {
+        background: #eee;
+    }
+    .sub-row.ref {
+        color: green;
+    }
+    .sub-row.ref .ctx {
+        text-align: end;
+    }
+    .sub-row.hyp {
+        color: red;
+        border-bottom: 1px solid black;
+    }
+    .sub-row.hyp .ctx {
+        text-align: start;
+    }
+</style>
+"""
+@st.cache_data
+def visualize_substitutions(ref, hyp):
+    norm = HebrewTextNormalizer()
+    wer_word_output = process_words(norm(ref), norm(hyp))
+    subs_rows = []
+    for sample in extract_substitution_samples(wer_word_output):
+        subs_rows.append(
+            {
+                "ref": " ".join(sample.ref),
+                "hyp": " ".join(sample.hyp),
+                "hyp_ctx": " ".join(
+                    wer_word_output.hypotheses[0][slice(*sample.hyp_context_span)]
+                ),
+                "ref_ctx": " ".join(
+                    wer_word_output.references[0][slice(*sample.ref_context_span)]
+                ),
+            }
+        )
+    sub_rows_html = []
+    for row in subs_rows:
+        sub_rows_html.append(
+            f"""
+            <tr class="sub-row ref">
+                <td class="ctx">{row['ref_ctx']}</td>
+                <td class="txt">{row['ref']}</td>
+                <td></td>
+            </tr>
+            <tr class="sub-row hyp">
+                <td></td>
+                <td class="txt">{row['hyp']}</td>
+                <td class="ctx">{row['hyp_ctx']}</td>
+            </tr>
+            """
+        )
+    st.subheader("Substitutions List")
+    table_html = f"""
+{subs_table_styles}
+<table class="sub-table" dir="rtl" lang="he">
+    <tr>
+        <th style="text-align: end;">Ref Context</th>
+        <th style="text-align: center;">Ref/Hyp</th>
+        <th style="text-align: start;">Hyp Context</th>
+    </tr>
+    {"".join(sub_rows_html)}
+</table>
+"""
+    st.html(table_html)

src/visual_eval/evaluator.py CHANGED Viewed

@@ -1,23 +1,9 @@
-"""
-Evaluator module.
-Provides functions to evaluate a given model on a dataset sample using the Faster Whisper model,
-and generate HTML visualization blocks of the word alignment.
-"""
-import concurrent.futures
-import gc
-import io
-import queue
-import threading
-from typing import Dict, Generator, List
-import soundfile as sf
 from hebrew import Hebrew
-from tqdm import tqdm
 from transformers.models.whisper.english_normalizer import BasicTextNormalizer
-from visual_eval.visualization import render_visualize_jiwer_result_html
 class HebrewTextNormalizer(BasicTextNormalizer):
     def __init__(self, *args, **kwargs):
@@ -54,3 +40,149 @@ class HebrewTextNormalizer(BasicTextNormalizer):
         text = self.__remove_quotes(text)
         text = super().__call__(text)
         return text

+from dataclasses import dataclass
 from hebrew import Hebrew
+from jiwer import process_words
 from transformers.models.whisper.english_normalizer import BasicTextNormalizer
 class HebrewTextNormalizer(BasicTextNormalizer):
     def __init__(self, *args, **kwargs):
         text = self.__remove_quotes(text)
         text = super().__call__(text)
         return text
+context_expansion_size = 4
+@dataclass
+class SubsSample:
+    ref_context_span: tuple[int, int]
+    hyp_context_span: tuple[int, int]
+    ref: list[str]
+    hyp: list[str]
+def merge_spans(span1, span2):
+    return (min(span1[0], span2[0]), max(span1[1], span2[1]))
+def merge_sub_samples(sub_samples: list[SubsSample]):
+    merged_sample = None
+    for sample in sub_samples:
+        if not merged_sample:
+            merged_sample = sample
+            continue
+        merged_sample = SubsSample(
+            ref_context_span=merge_spans(
+                merged_sample.ref_context_span, sample.ref_context_span
+            ),
+            hyp_context_span=merge_spans(
+                merged_sample.hyp_context_span, sample.hyp_context_span
+            ),
+            ref=merged_sample.ref + sample.ref,
+            hyp=merged_sample.hyp + sample.hyp,
+        )
+    return merged_sample
+def get_aligned_chunk_words(wer_word_output, chunk):
+    ref_words = None
+    hyp_words = None
+    ref_context_span = [
+        max(0, chunk.ref_start_idx - context_expansion_size),
+        min(
+            chunk.ref_end_idx + context_expansion_size,
+            len(wer_word_output.references[0]),
+        ),
+    ]
+    hyp_context_span = [
+        max(0, chunk.hyp_start_idx - context_expansion_size),
+        min(
+            chunk.hyp_end_idx + context_expansion_size,
+            len(wer_word_output.hypotheses[0]),
+        ),
+    ]
+    if chunk.type == "equal":
+        ref_words = wer_word_output.references[0][
+            chunk.ref_start_idx : chunk.ref_end_idx
+        ]
+        hyp_words = wer_word_output.hypotheses[0][
+            chunk.hyp_start_idx : chunk.hyp_end_idx
+        ]
+    elif chunk.type == "delete":
+        ref_words = wer_word_output.references[0][
+            chunk.ref_start_idx : chunk.ref_end_idx
+        ]
+        hyp_words = [""] * len(ref_words)
+    elif chunk.type == "insert":
+        hyp_words = wer_word_output.hypotheses[0][
+            chunk.hyp_start_idx : chunk.hyp_end_idx
+        ]
+        ref_words = [""] * len(hyp_words)
+    elif chunk.type == "substitute":
+        ref_words = wer_word_output.references[0][
+            chunk.ref_start_idx : chunk.ref_end_idx
+        ]
+        hyp_words = wer_word_output.hypotheses[0][
+            chunk.hyp_start_idx : chunk.hyp_end_idx
+        ]
+    return ref_words, hyp_words, ref_context_span, hyp_context_span
+def extract_substitution_samples(wer_word_output) -> list[SubsSample]:
+    subs_samples = []
+    prev_chunk = None
+    all_chunks = wer_word_output.alignments[0]
+    for chunk, next_chunk in zip(all_chunks, all_chunks[1:] + [None]):
+        sample_to_store = None
+        if chunk.type in ["delete", "insert"]:
+            if prev_chunk and prev_chunk.type in ["substitute"]:
+                ref_words, hyp_words, ref_context_span, hyp_context_span = (
+                    get_aligned_chunk_words(wer_word_output, prev_chunk)
+                )
+                prev_sample = SubsSample(
+                    ref_context_span=ref_context_span,
+                    hyp_context_span=hyp_context_span,
+                    ref=ref_words,
+                    hyp=hyp_words,
+                )
+                ref_words, hyp_words, ref_context_span, hyp_context_span = (
+                    get_aligned_chunk_words(wer_word_output, chunk)
+                )
+                sample = SubsSample(
+                    ref_context_span=ref_context_span,
+                    hyp_context_span=hyp_context_span,
+                    ref=ref_words,
+                    hyp=hyp_words,
+                )
+                sample_to_store = merge_sub_samples([prev_sample, sample])
+        if chunk.type == "substitute":
+            if next_chunk and next_chunk.type in ["insert", "delete"]:
+                pass  # allow the next chunk to capture this chunk
+            else:
+                prev_sample = None
+                if prev_chunk and prev_chunk.type in ["insert", "delete"]:
+                    ref_words, hyp_words, ref_context_span, hyp_context_span = (
+                        get_aligned_chunk_words(wer_word_output, prev_chunk)
+                    )
+                    prev_sample = SubsSample(
+                        ref_context_span=ref_context_span,
+                        hyp_context_span=hyp_context_span,
+                        ref=ref_words,
+                        hyp=hyp_words,
+                    )
+                ref_words, hyp_words, ref_context_span, hyp_context_span = (
+                    get_aligned_chunk_words(wer_word_output, chunk)
+                )
+                sample = SubsSample(
+                    ref_context_span=ref_context_span,
+                    hyp_context_span=hyp_context_span,
+                    ref=ref_words,
+                    hyp=hyp_words,
+                )
+                sample_to_store = (
+                    merge_sub_samples([prev_sample, sample]) if prev_sample else sample
+                )
+        if sample_to_store:
+            subs_samples.append(sample_to_store)
+            prev_chunk = None  # consume once
+        else:
+            prev_chunk = chunk
+    return subs_samples