File size: 2,664 Bytes
bd4c34d
e1aa7a1
2dd8e14
 
bd4c34d
2dd8e14
bd4c34d
 
 
 
 
 
 
 
2dd8e14
bd4c34d
 
 
 
 
 
 
2dd8e14
bd4c34d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2dd8e14
bd4c34d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
from speechbrain.inference.separation import SepformerSeparation as separator
import torchaudio
import gradio as gr
from moviepy.editor import VideoFileClip

def convert_video_to_audio(video_input):
    video_clip = VideoFileClip(video_input)
    audio_clip = video_clip.audio
    audio_clip_filepath = os.path.normpath(f"{video_input.split('.')[0]}.m4a")
    audio_clip.write_audiofile(audio_clip_filepath, codec='aac')
    audio_clip.close()
    video_clip.close()
    return audio_clip_filepath

def speechbrain(input_obj, input_obj_type):
    
    if input_obj_type == "video":
        aud = convert_video_to_audio(input_obj)
    est_sources = model.separate_file(path=aud)
    torchaudio.save("clean_audio_file.wav", est_sources[:, :, 0].detach().cpu(), 8000)
    return "clean_audio_file.wav"

def main():
    with gr.Blocks(title="Speech Enhancement", delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
        description = "Gradio demo for Speech Enhancement by SpeechBrain. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
        article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2010.13154' target='_blank'>Attention is All You Need in Speech Separation</a> | <a href='https://github.com/speechbrain/speechbrain/tree/develop/templates/enhancement' '_blank'>Github Repo</a></p>"
        examples = [
            ['samples_audio_samples_test_mixture.wav']
        ]
        with gr.Tabs(selected="video") as tabs:
            with gr.Tab("Video", id="video"):
                gr.Interface(
                  fn=speechbrain,
                  inputs=[
                      gr.Video(type="filepath"),
                      gr.Radio(choices=["video"], value="video", label="File Type")
                  ]
                  outputs=[
                    gr.Audio(label="Output Audio", type="filepath")
                  ],
                  description=description,
                  article=article,
                  examples=examples
                )
            with gr.Tab("Audio", id="audio"):
                gr.Interface(
                  fn=speechbrain,
                  inputs=[
                      gr.Audio(type="filepath"),
                      gr.Radio(choices=["audio"], value="audio", label="File Type")
                  ]
                  outputs=[
                    gr.Audio(label="Output Audio", type="filepath"),
                  ],
                  description=description,
                  article=article,
                  examples=examples
                )
    demo.launch()

if __name__ == '__main__':
    main()