speech-enhancement

Running

File size: 2,664 Bytes

bd4c34d
e1aa7a1
2dd8e14
 
bd4c34d
2dd8e14
bd4c34d
 
 
 
 
 
 
 
2dd8e14
bd4c34d
 
 
 
 
 
 
2dd8e14
bd4c34d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2dd8e14
bd4c34d

import os
from speechbrain.inference.separation import SepformerSeparation as separator
import torchaudio
import gradio as gr
from moviepy.editor import VideoFileClip

def convert_video_to_audio(video_input):
    video_clip = VideoFileClip(video_input)
    audio_clip = video_clip.audio
    audio_clip_filepath = os.path.normpath(f"{video_input.split('.')[0]}.m4a")
    audio_clip.write_audiofile(audio_clip_filepath, codec='aac')
    audio_clip.close()
    video_clip.close()
    return audio_clip_filepath

def speechbrain(input_obj, input_obj_type):
    
    if input_obj_type == "video":
        aud = convert_video_to_audio(input_obj)
    est_sources = model.separate_file(path=aud)
    torchaudio.save("clean_audio_file.wav", est_sources[:, :, 0].detach().cpu(), 8000)
    return "clean_audio_file.wav"

def main():
    with gr.Blocks(title="Speech Enhancement", delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
        description = "Gradio demo for Speech Enhancement by SpeechBrain. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
        article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2010.13154' target='_blank'>Attention is All You Need in Speech Separation</a> | <a href='https://github.com/speechbrain/speechbrain/tree/develop/templates/enhancement' '_blank'>Github Repo</a></p>"
        examples = [
            ['samples_audio_samples_test_mixture.wav']
        ]
        with gr.Tabs(selected="video") as tabs:
            with gr.Tab("Video", id="video"):
                gr.Interface(
                  fn=speechbrain,
                  inputs=[
                      gr.Video(type="filepath"),
                      gr.Radio(choices=["video"], value="video", label="File Type")
                  ]
                  outputs=[
                    gr.Audio(label="Output Audio", type="filepath")
                  ],
                  description=description,
                  article=article,
                  examples=examples
                )
            with gr.Tab("Audio", id="audio"):
                gr.Interface(
                  fn=speechbrain,
                  inputs=[
                      gr.Audio(type="filepath"),
                      gr.Radio(choices=["audio"], value="audio", label="File Type")
                  ]
                  outputs=[
                    gr.Audio(label="Output Audio", type="filepath"),
                  ],
                  description=description,
                  article=article,
                  examples=examples
                )
    demo.launch()

if __name__ == '__main__':
    main()