NeuralFalcon commited on
Commit
1531726
·
verified ·
1 Parent(s): 1e83001

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nemo.collections.asr as nemo_asr
2
+ import torch
3
+ import gc
4
+ import os
5
+ import subprocess
6
+ from pathlib import Path
7
+ import gradio as gr
8
+ import shutil
9
+ from utils import *
10
+
11
+ def run_nemo_asr(mono_audio_path):
12
+ asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
13
+ output = asr_model.transcribe([mono_audio_path], timestamps=True)
14
+ # by default, timestamps are enabled for char, word and segment level
15
+ word_timestamps = output[0].timestamp['word'] # word level timestamps for first sample
16
+ segment_timestamps = output[0].timestamp['segment'] # segment level timestamps
17
+ char_timestamps = output[0].timestamp['char'] # char level timestamps
18
+ # for stamp in segment_timestamps:
19
+ # print(f"{stamp['start']}s - {stamp['end']}s : {stamp['segment']}")
20
+ del asr_model
21
+ gc.collect()
22
+ torch.cuda.empty_cache()
23
+ return word_timestamps,segment_timestamps,char_timestamps
24
+
25
+
26
+
27
+ def process(file):
28
+ file_path = file.name
29
+ file_ext = Path(file_path).suffix.lower()
30
+
31
+ if file_ext in [".mp4", ".mkv"]:
32
+ new_file_path=clean_file_name(file_path,unique_id=False) #ffmpeg sometime don't work if you give bad file name stupid idea but still i will do this
33
+ shutil.copy(file_path,new_file_path)
34
+ audio_path = new_file_path.replace(file_ext, ".mp3")
35
+ subprocess.run(["ffmpeg", "-i", new_file_path, audio_path, "-y"])
36
+ os.remove(new_file_path)
37
+ else:
38
+ audio_path = file_path
39
+
40
+ mono_audio_path = convert_to_mono(audio_path)
41
+ word_timestamps, segment_timestamps, char_timestamps = run_nemo_asr(mono_audio_path)
42
+ default_srt, word_srt, shorts_srt, text_path, json_path, raw_text = save_files(mono_audio_path, word_timestamps)
43
+
44
+ if os.path.exists(mono_audio_path):
45
+ os.remove(mono_audio_path)
46
+
47
+ return default_srt, word_srt, shorts_srt, text_path, json_path, raw_text
48
+
49
+
50
+
51
+
52
+ import click
53
+ @click.command()
54
+ @click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
55
+ @click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
56
+ def main(debug, share):
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("<center><h1 style='font-size: 40px;'>Auto Subtitle Generator</h1></center>")
59
+ gr.Markdown("Need to improve the SRT generation code.")
60
+ with gr.Row():
61
+ with gr.Column():
62
+ upload_file = gr.File(label="Upload Audio or Video File")
63
+ with gr.Row():
64
+ generate_btn = gr.Button("🚀 Generate Subtitle", variant="primary")
65
+
66
+ with gr.Column():
67
+ output_default_srt = gr.File(label="sentence Level SRT File")
68
+ output_word_srt = gr.File(label="Word Level SRT File")
69
+
70
+ with gr.Accordion("Others Format", open=False):
71
+ output_shorts_srt = gr.File(label="Subtitle For Vertical Video [Shorts or Reels]")
72
+ output_text_file = gr.File(label="Speech To Text File")
73
+ output_json = gr.File(label="Word Timestamp JSON")
74
+ output_text = gr.Text(label="Transcribed Text",lines=6)
75
+
76
+ generate_btn.click(
77
+ fn=process,
78
+ inputs=[upload_file],
79
+ outputs=[
80
+ output_default_srt,
81
+ output_word_srt,
82
+ output_shorts_srt,
83
+ output_text_file,
84
+ output_json,
85
+ output_text
86
+ ]
87
+ )
88
+
89
+ demo.queue().launch(debug=debug, share=share)
90
+
91
+ if __name__ == "__main__":
92
+ main()