Spaces:
Running
on
Zero
Running
on
Zero
improve diarized transcript format
Browse files
app.py
CHANGED
@@ -57,23 +57,36 @@ sense_models = {}
|
|
57 |
dar_pipe = None
|
58 |
|
59 |
converter = opencc.OpenCC('s2t')
|
60 |
-
|
|
|
61 |
def format_diarization_html(snippets):
|
62 |
palette = ["#e74c3c", "#3498db", "#27ae60", "#e67e22", "#9b59b6", "#16a085", "#f1c40f"]
|
63 |
speaker_colors = {}
|
64 |
-
|
|
|
65 |
for s in snippets:
|
66 |
if s.startswith("[") and "]" in s:
|
67 |
spk, txt = s[1:].split("]", 1)
|
68 |
spk, txt = spk.strip(), txt.strip()
|
69 |
else:
|
70 |
-
spk, txt = "", s
|
|
|
|
|
|
|
|
|
71 |
if spk not in speaker_colors:
|
72 |
speaker_colors[spk] = palette[len(speaker_colors) % len(palette)]
|
73 |
color = speaker_colors[spk]
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
# —————— Helpers ——————
|
79 |
def get_whisper_pipe(model_id: str, device: int):
|
|
|
57 |
dar_pipe = None
|
58 |
|
59 |
converter = opencc.OpenCC('s2t')
|
60 |
+
|
61 |
+
# —————— Diarization Formatter ——————
|
62 |
def format_diarization_html(snippets):
|
63 |
palette = ["#e74c3c", "#3498db", "#27ae60", "#e67e22", "#9b59b6", "#16a085", "#f1c40f"]
|
64 |
speaker_colors = {}
|
65 |
+
html_lines = []
|
66 |
+
last_spk = None
|
67 |
for s in snippets:
|
68 |
if s.startswith("[") and "]" in s:
|
69 |
spk, txt = s[1:].split("]", 1)
|
70 |
spk, txt = spk.strip(), txt.strip()
|
71 |
else:
|
72 |
+
spk, txt = "", s.strip()
|
73 |
+
# hide empty lines
|
74 |
+
if not txt:
|
75 |
+
continue
|
76 |
+
# assign color if new speaker
|
77 |
if spk not in speaker_colors:
|
78 |
speaker_colors[spk] = palette[len(speaker_colors) % len(palette)]
|
79 |
color = speaker_colors[spk]
|
80 |
+
# simplify tag for same speaker
|
81 |
+
if spk == last_spk:
|
82 |
+
display = txt
|
83 |
+
else:
|
84 |
+
display = f"<strong>{spk}:</strong> {txt}"
|
85 |
+
last_spk = spk
|
86 |
+
html_lines.append(
|
87 |
+
f"<p style='margin:4px 0; font-family:monospace; color:{color};'>{display}</p>"
|
88 |
+
)
|
89 |
+
return "<div>" + "".join(html_lines) + "</div>"
|
90 |
|
91 |
# —————— Helpers ——————
|
92 |
def get_whisper_pipe(model_id: str, device: int):
|