Spaces:

marquesafonso
/

multilang-asr-transcriber

Running

App Files Files Community

marquesafonso commited on 4 days ago

Commit

810585d

1 Parent(s): 092cfe0

add json output for word highlighting

Browse files

Files changed (3) hide show

.gitignore +4 -1
app.py +9 -2
src/transcriber.py +28 -4

.gitignore CHANGED Viewed

@@ -2,4 +2,7 @@
 .python-version
 pyproject.toml
 uv.lock
-*/__pycache__/

 .python-version
 pyproject.toml
 uv.lock
+*/__pycache__/
+Dockerfile
+docker-compose.yml
+.gradio

app.py CHANGED Viewed

@@ -1,5 +1,10 @@
 import gradio as gr
 from src.transcriber import transcriber
 def main():
     with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
@@ -18,10 +23,11 @@ def main():
                 text_output = gr.Textbox(label="SRT Text transcription")
                 srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
                 text_clean_output = gr.Textbox(label="Text transcription")
                 gr.Interface(
                     fn=transcriber,
                     inputs=[file, file_type, max_words_per_line, task, model_version],
-                    outputs=[text_output, srt_file, text_clean_output],
                     allow_flagging="never"
                 )
             with gr.Tab("Audio", id = "audio"):
@@ -36,10 +42,11 @@ def main():
                 text_output = gr.Textbox(label="SRT Text transcription")
                 srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
                 text_clean_output = gr.Textbox(label="Text transcription")
                 gr.Interface(
                     fn=transcriber,
                     inputs=[file, file_type, max_words_per_line, task, model_version],
-                    outputs=[text_output, srt_file, text_clean_output],
                     allow_flagging="never"
                 )
         demo.launch()

 import gradio as gr
 from src.transcriber import transcriber
+import os
+# os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), "tmp")
+# os.environ["HF_HOME"] = os.path.join(os.getcwd(), "hf_tmp")
 def main():
     with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
                 text_output = gr.Textbox(label="SRT Text transcription")
                 srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
                 text_clean_output = gr.Textbox(label="Text transcription")
+                json_output = gr.JSON("JSON Transcription")
                 gr.Interface(
                     fn=transcriber,
                     inputs=[file, file_type, max_words_per_line, task, model_version],
+                    outputs=[text_output, srt_file, text_clean_output, json_output],
                     allow_flagging="never"
                 )
             with gr.Tab("Audio", id = "audio"):
                 text_output = gr.Textbox(label="SRT Text transcription")
                 srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
                 text_clean_output = gr.Textbox(label="Text transcription")
+                json_output = gr.JSON("JSON Transcription")
                 gr.Interface(
                     fn=transcriber,
                     inputs=[file, file_type, max_words_per_line, task, model_version],
+                    outputs=[text_output, srt_file, text_clean_output, json_output],
                     allow_flagging="never"
                 )
         demo.launch()

src/transcriber.py CHANGED Viewed

@@ -24,24 +24,48 @@ def write_srt(segments, max_words_per_line, srt_path):
     with open(srt_path, "w", encoding='utf-8') as file:
         result = ''
         result_clean = []
         line_counter = 1
         for _, segment in enumerate(segments):
             words_in_line = []
             for w, word in enumerate(segment.words):
                 words_in_line.append(word)
                 # Write the line if max words limit reached or it's the last word in the segment
                 if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
-                    if words_in_line:  # Check to avoid writing a line if there are no words
                         start_time = convert_seconds_to_time(words_in_line[0].start)
                         end_time = convert_seconds_to_time(words_in_line[-1].end)
                         line_text = ' '.join([w.word.strip() for w in words_in_line])
                         result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
                         result_clean += [line_text]
-                        # Reset for the next line and increment line counter
                         line_counter += 1
-                    words_in_line = []  # Reset words list for the next line
         file.write(result)
-        return result, srt_path, " ".join(result_clean)
 def transcriber(file_input:gr.File,
                 file_type: str,

     with open(srt_path, "w", encoding='utf-8') as file:
         result = ''
         result_clean = []
+        json_output = {
+            "lines": []
+        }
         line_counter = 1
         for _, segment in enumerate(segments):
             words_in_line = []
             for w, word in enumerate(segment.words):
                 words_in_line.append(word)
                 # Write the line if max words limit reached or it's the last word in the segment
                 if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
+                    if words_in_line:
                         start_time = convert_seconds_to_time(words_in_line[0].start)
                         end_time = convert_seconds_to_time(words_in_line[-1].end)
                         line_text = ' '.join([w.word.strip() for w in words_in_line])
+                        # SRT format
                         result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
                         result_clean += [line_text]
+                        # JSON format
+                        json_output["lines"].append({
+                            "line_index": line_counter,
+                            "start": words_in_line[0].start,
+                            "end": words_in_line[-1].end,
+                            "text": line_text,
+                            "words": [
+                                {
+                                    "word": w.word.strip(),
+                                    "start": w.start,
+                                    "end": w.end
+                                } for w in words_in_line
+                            ]
+                        })
                         line_counter += 1
+                    words_in_line = []
         file.write(result)
+        return result, srt_path, " ".join(result_clean), json_output
 def transcriber(file_input:gr.File,
                 file_type: str,