Commit
·
810585d
1
Parent(s):
092cfe0
add json output for word highlighting
Browse files- .gitignore +4 -1
- app.py +9 -2
- src/transcriber.py +28 -4
.gitignore
CHANGED
@@ -2,4 +2,7 @@
|
|
2 |
.python-version
|
3 |
pyproject.toml
|
4 |
uv.lock
|
5 |
-
*/__pycache__/
|
|
|
|
|
|
|
|
2 |
.python-version
|
3 |
pyproject.toml
|
4 |
uv.lock
|
5 |
+
*/__pycache__/
|
6 |
+
Dockerfile
|
7 |
+
docker-compose.yml
|
8 |
+
.gradio
|
app.py
CHANGED
@@ -1,5 +1,10 @@
|
|
1 |
import gradio as gr
|
2 |
from src.transcriber import transcriber
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def main():
|
5 |
with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
|
@@ -18,10 +23,11 @@ def main():
|
|
18 |
text_output = gr.Textbox(label="SRT Text transcription")
|
19 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
20 |
text_clean_output = gr.Textbox(label="Text transcription")
|
|
|
21 |
gr.Interface(
|
22 |
fn=transcriber,
|
23 |
inputs=[file, file_type, max_words_per_line, task, model_version],
|
24 |
-
outputs=[text_output, srt_file, text_clean_output],
|
25 |
allow_flagging="never"
|
26 |
)
|
27 |
with gr.Tab("Audio", id = "audio"):
|
@@ -36,10 +42,11 @@ def main():
|
|
36 |
text_output = gr.Textbox(label="SRT Text transcription")
|
37 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
38 |
text_clean_output = gr.Textbox(label="Text transcription")
|
|
|
39 |
gr.Interface(
|
40 |
fn=transcriber,
|
41 |
inputs=[file, file_type, max_words_per_line, task, model_version],
|
42 |
-
outputs=[text_output, srt_file, text_clean_output],
|
43 |
allow_flagging="never"
|
44 |
)
|
45 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from src.transcriber import transcriber
|
3 |
+
import os
|
4 |
+
|
5 |
+
# os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), "tmp")
|
6 |
+
# os.environ["HF_HOME"] = os.path.join(os.getcwd(), "hf_tmp")
|
7 |
+
|
8 |
|
9 |
def main():
|
10 |
with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
|
|
|
23 |
text_output = gr.Textbox(label="SRT Text transcription")
|
24 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
25 |
text_clean_output = gr.Textbox(label="Text transcription")
|
26 |
+
json_output = gr.JSON("JSON Transcription")
|
27 |
gr.Interface(
|
28 |
fn=transcriber,
|
29 |
inputs=[file, file_type, max_words_per_line, task, model_version],
|
30 |
+
outputs=[text_output, srt_file, text_clean_output, json_output],
|
31 |
allow_flagging="never"
|
32 |
)
|
33 |
with gr.Tab("Audio", id = "audio"):
|
|
|
42 |
text_output = gr.Textbox(label="SRT Text transcription")
|
43 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
44 |
text_clean_output = gr.Textbox(label="Text transcription")
|
45 |
+
json_output = gr.JSON("JSON Transcription")
|
46 |
gr.Interface(
|
47 |
fn=transcriber,
|
48 |
inputs=[file, file_type, max_words_per_line, task, model_version],
|
49 |
+
outputs=[text_output, srt_file, text_clean_output, json_output],
|
50 |
allow_flagging="never"
|
51 |
)
|
52 |
demo.launch()
|
src/transcriber.py
CHANGED
@@ -24,24 +24,48 @@ def write_srt(segments, max_words_per_line, srt_path):
|
|
24 |
with open(srt_path, "w", encoding='utf-8') as file:
|
25 |
result = ''
|
26 |
result_clean = []
|
|
|
|
|
|
|
|
|
27 |
line_counter = 1
|
28 |
for _, segment in enumerate(segments):
|
29 |
words_in_line = []
|
30 |
for w, word in enumerate(segment.words):
|
31 |
words_in_line.append(word)
|
|
|
32 |
# Write the line if max words limit reached or it's the last word in the segment
|
33 |
if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
|
34 |
-
if words_in_line:
|
35 |
start_time = convert_seconds_to_time(words_in_line[0].start)
|
36 |
end_time = convert_seconds_to_time(words_in_line[-1].end)
|
37 |
line_text = ' '.join([w.word.strip() for w in words_in_line])
|
|
|
|
|
38 |
result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
|
39 |
result_clean += [line_text]
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
line_counter += 1
|
42 |
-
words_in_line = []
|
|
|
43 |
file.write(result)
|
44 |
-
return result, srt_path, " ".join(result_clean)
|
|
|
45 |
|
46 |
def transcriber(file_input:gr.File,
|
47 |
file_type: str,
|
|
|
24 |
with open(srt_path, "w", encoding='utf-8') as file:
|
25 |
result = ''
|
26 |
result_clean = []
|
27 |
+
json_output = {
|
28 |
+
"lines": []
|
29 |
+
}
|
30 |
+
|
31 |
line_counter = 1
|
32 |
for _, segment in enumerate(segments):
|
33 |
words_in_line = []
|
34 |
for w, word in enumerate(segment.words):
|
35 |
words_in_line.append(word)
|
36 |
+
|
37 |
# Write the line if max words limit reached or it's the last word in the segment
|
38 |
if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
|
39 |
+
if words_in_line:
|
40 |
start_time = convert_seconds_to_time(words_in_line[0].start)
|
41 |
end_time = convert_seconds_to_time(words_in_line[-1].end)
|
42 |
line_text = ' '.join([w.word.strip() for w in words_in_line])
|
43 |
+
|
44 |
+
# SRT format
|
45 |
result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
|
46 |
result_clean += [line_text]
|
47 |
+
|
48 |
+
# JSON format
|
49 |
+
json_output["lines"].append({
|
50 |
+
"line_index": line_counter,
|
51 |
+
"start": words_in_line[0].start,
|
52 |
+
"end": words_in_line[-1].end,
|
53 |
+
"text": line_text,
|
54 |
+
"words": [
|
55 |
+
{
|
56 |
+
"word": w.word.strip(),
|
57 |
+
"start": w.start,
|
58 |
+
"end": w.end
|
59 |
+
} for w in words_in_line
|
60 |
+
]
|
61 |
+
})
|
62 |
+
|
63 |
line_counter += 1
|
64 |
+
words_in_line = []
|
65 |
+
|
66 |
file.write(result)
|
67 |
+
return result, srt_path, " ".join(result_clean), json_output
|
68 |
+
|
69 |
|
70 |
def transcriber(file_input:gr.File,
|
71 |
file_type: str,
|