Luigi commited on
Commit
7c3ad3d
·
1 Parent(s): 264de1a

improve diarized transcript format

Browse files
Files changed (1) hide show
  1. app.py +19 -6
app.py CHANGED
@@ -57,23 +57,36 @@ sense_models = {}
57
  dar_pipe = None
58
 
59
  converter = opencc.OpenCC('s2t')
60
- # —————— Helpers ——————
 
61
  def format_diarization_html(snippets):
62
  palette = ["#e74c3c", "#3498db", "#27ae60", "#e67e22", "#9b59b6", "#16a085", "#f1c40f"]
63
  speaker_colors = {}
64
- html = ["<div style='font-family:monospace; line-height:1.5em;'>"]
 
65
  for s in snippets:
66
  if s.startswith("[") and "]" in s:
67
  spk, txt = s[1:].split("]", 1)
68
  spk, txt = spk.strip(), txt.strip()
69
  else:
70
- spk, txt = "", s
 
 
 
 
71
  if spk not in speaker_colors:
72
  speaker_colors[spk] = palette[len(speaker_colors) % len(palette)]
73
  color = speaker_colors[spk]
74
- html.append(f"<p style='margin:4px 0; color:{color};'><strong>{spk}:</strong> {txt}</p>")
75
- html.append("</div>")
76
- return "".join(html)
 
 
 
 
 
 
 
77
 
78
  # —————— Helpers ——————
79
  def get_whisper_pipe(model_id: str, device: int):
 
57
  dar_pipe = None
58
 
59
  converter = opencc.OpenCC('s2t')
60
+
61
+ # —————— Diarization Formatter ——————
62
  def format_diarization_html(snippets):
63
  palette = ["#e74c3c", "#3498db", "#27ae60", "#e67e22", "#9b59b6", "#16a085", "#f1c40f"]
64
  speaker_colors = {}
65
+ html_lines = []
66
+ last_spk = None
67
  for s in snippets:
68
  if s.startswith("[") and "]" in s:
69
  spk, txt = s[1:].split("]", 1)
70
  spk, txt = spk.strip(), txt.strip()
71
  else:
72
+ spk, txt = "", s.strip()
73
+ # hide empty lines
74
+ if not txt:
75
+ continue
76
+ # assign color if new speaker
77
  if spk not in speaker_colors:
78
  speaker_colors[spk] = palette[len(speaker_colors) % len(palette)]
79
  color = speaker_colors[spk]
80
+ # simplify tag for same speaker
81
+ if spk == last_spk:
82
+ display = txt
83
+ else:
84
+ display = f"<strong>{spk}:</strong> {txt}"
85
+ last_spk = spk
86
+ html_lines.append(
87
+ f"<p style='margin:4px 0; font-family:monospace; color:{color};'>{display}</p>"
88
+ )
89
+ return "<div>" + "".join(html_lines) + "</div>"
90
 
91
  # —————— Helpers ——————
92
  def get_whisper_pipe(model_id: str, device: int):