marquesafonso commited on
Commit
810585d
·
1 Parent(s): 092cfe0

add json output for word highlighting

Browse files
Files changed (3) hide show
  1. .gitignore +4 -1
  2. app.py +9 -2
  3. src/transcriber.py +28 -4
.gitignore CHANGED
@@ -2,4 +2,7 @@
2
  .python-version
3
  pyproject.toml
4
  uv.lock
5
- */__pycache__/
 
 
 
 
2
  .python-version
3
  pyproject.toml
4
  uv.lock
5
+ */__pycache__/
6
+ Dockerfile
7
+ docker-compose.yml
8
+ .gradio
app.py CHANGED
@@ -1,5 +1,10 @@
1
  import gradio as gr
2
  from src.transcriber import transcriber
 
 
 
 
 
3
 
4
  def main():
5
  with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
@@ -18,10 +23,11 @@ def main():
18
  text_output = gr.Textbox(label="SRT Text transcription")
19
  srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
20
  text_clean_output = gr.Textbox(label="Text transcription")
 
21
  gr.Interface(
22
  fn=transcriber,
23
  inputs=[file, file_type, max_words_per_line, task, model_version],
24
- outputs=[text_output, srt_file, text_clean_output],
25
  allow_flagging="never"
26
  )
27
  with gr.Tab("Audio", id = "audio"):
@@ -36,10 +42,11 @@ def main():
36
  text_output = gr.Textbox(label="SRT Text transcription")
37
  srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
38
  text_clean_output = gr.Textbox(label="Text transcription")
 
39
  gr.Interface(
40
  fn=transcriber,
41
  inputs=[file, file_type, max_words_per_line, task, model_version],
42
- outputs=[text_output, srt_file, text_clean_output],
43
  allow_flagging="never"
44
  )
45
  demo.launch()
 
1
  import gradio as gr
2
  from src.transcriber import transcriber
3
+ import os
4
+
5
+ # os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), "tmp")
6
+ # os.environ["HF_HOME"] = os.path.join(os.getcwd(), "hf_tmp")
7
+
8
 
9
  def main():
10
  with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
 
23
  text_output = gr.Textbox(label="SRT Text transcription")
24
  srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
25
  text_clean_output = gr.Textbox(label="Text transcription")
26
+ json_output = gr.JSON("JSON Transcription")
27
  gr.Interface(
28
  fn=transcriber,
29
  inputs=[file, file_type, max_words_per_line, task, model_version],
30
+ outputs=[text_output, srt_file, text_clean_output, json_output],
31
  allow_flagging="never"
32
  )
33
  with gr.Tab("Audio", id = "audio"):
 
42
  text_output = gr.Textbox(label="SRT Text transcription")
43
  srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
44
  text_clean_output = gr.Textbox(label="Text transcription")
45
+ json_output = gr.JSON("JSON Transcription")
46
  gr.Interface(
47
  fn=transcriber,
48
  inputs=[file, file_type, max_words_per_line, task, model_version],
49
+ outputs=[text_output, srt_file, text_clean_output, json_output],
50
  allow_flagging="never"
51
  )
52
  demo.launch()
src/transcriber.py CHANGED
@@ -24,24 +24,48 @@ def write_srt(segments, max_words_per_line, srt_path):
24
  with open(srt_path, "w", encoding='utf-8') as file:
25
  result = ''
26
  result_clean = []
 
 
 
 
27
  line_counter = 1
28
  for _, segment in enumerate(segments):
29
  words_in_line = []
30
  for w, word in enumerate(segment.words):
31
  words_in_line.append(word)
 
32
  # Write the line if max words limit reached or it's the last word in the segment
33
  if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
34
- if words_in_line: # Check to avoid writing a line if there are no words
35
  start_time = convert_seconds_to_time(words_in_line[0].start)
36
  end_time = convert_seconds_to_time(words_in_line[-1].end)
37
  line_text = ' '.join([w.word.strip() for w in words_in_line])
 
 
38
  result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
39
  result_clean += [line_text]
40
- # Reset for the next line and increment line counter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  line_counter += 1
42
- words_in_line = [] # Reset words list for the next line
 
43
  file.write(result)
44
- return result, srt_path, " ".join(result_clean)
 
45
 
46
  def transcriber(file_input:gr.File,
47
  file_type: str,
 
24
  with open(srt_path, "w", encoding='utf-8') as file:
25
  result = ''
26
  result_clean = []
27
+ json_output = {
28
+ "lines": []
29
+ }
30
+
31
  line_counter = 1
32
  for _, segment in enumerate(segments):
33
  words_in_line = []
34
  for w, word in enumerate(segment.words):
35
  words_in_line.append(word)
36
+
37
  # Write the line if max words limit reached or it's the last word in the segment
38
  if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
39
+ if words_in_line:
40
  start_time = convert_seconds_to_time(words_in_line[0].start)
41
  end_time = convert_seconds_to_time(words_in_line[-1].end)
42
  line_text = ' '.join([w.word.strip() for w in words_in_line])
43
+
44
+ # SRT format
45
  result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
46
  result_clean += [line_text]
47
+
48
+ # JSON format
49
+ json_output["lines"].append({
50
+ "line_index": line_counter,
51
+ "start": words_in_line[0].start,
52
+ "end": words_in_line[-1].end,
53
+ "text": line_text,
54
+ "words": [
55
+ {
56
+ "word": w.word.strip(),
57
+ "start": w.start,
58
+ "end": w.end
59
+ } for w in words_in_line
60
+ ]
61
+ })
62
+
63
  line_counter += 1
64
+ words_in_line = []
65
+
66
  file.write(result)
67
+ return result, srt_path, " ".join(result_clean), json_output
68
+
69
 
70
  def transcriber(file_input:gr.File,
71
  file_type: str,