Spaces:

marquesafonso
/

audio2waveform

Running

File size: 5,360 Bytes

import subprocess
import gradio as gr
import tempfile
from pathlib import Path
import os
from moviepy.editor import VideoFileClip

def convert_video_to_audio(video_input):
    video_clip = VideoFileClip(video_input)
    audio_clip = video_clip.audio
    audio_clip_filepath = os.path.normpath(f"{video_input.split('.')[0]}.m4a")
    audio_clip.write_audiofile(audio_clip_filepath, codec='aac')
    audio_clip.close()
    video_clip.close()
    return audio_clip_filepath


# Check permissions and environment setup
print("Checking directory and file permissions...")
for path in ['/tmp', '/usr/share/fonts', '/root/.config', '/src/fonts', '/src/flagged']:
    if os.access(path, os.W_OK):
        print(f'Directory {path} is writable.')
    else:
        print(f'Error: Directory {path} is not writable.')

class Predictor:
    def predict(self,
        audio: str,
        bg_color: str = "#000000",
        fg_alpha: float = 0.75,
        bars_color: str = "#ffffff",
        bar_count: int = 100,
        bar_width: float = 0.4,
        caption_text: str = "",
    ) -> str:
        """Make waveform video from audio file"""
        waveform_video = gr.make_waveform(
            audio,
            bg_color=bg_color,
            fg_alpha=fg_alpha,
            bars_color=bars_color,
            bar_count=bar_count,
            bar_width=bar_width,
        )

        if caption_text == "" or caption_text is None:
            return waveform_video
        else:
            padded_waveform_path = tempfile.mktemp(suffix=".mp4")
            background_image_path = tempfile.mktemp(suffix=".png")
            final_video_path = tempfile.mktemp(suffix=".mp4")

            print("Running ffmpeg to add padding...")
            subprocess.run([
                'ffmpeg', '-y', '-i', waveform_video, '-vf',
                f'pad=width=1000:height=667:x=0:y=467:color={bg_color[1:]}',
                padded_waveform_path
            ], check=True)

            print("Creating an image using ImageMagick...")
            subprocess.run([
                'convert', '-background', bg_color, '-fill', bars_color, '-font', '/src/fonts/Roboto-Black.ttf',
                '-pointsize', '48', '-size', '900x367', '-gravity', 'center', f'caption:{caption_text}',
                '-bordercolor', bg_color, '-border', '40', background_image_path
            ], check=True)

            print("Overlaying the image on the padded waveform video...")
            subprocess.run([
                'ffmpeg', '-y', '-i', padded_waveform_path, '-i', background_image_path,
                '-filter_complex', 'overlay=0:0', final_video_path
            ], check=True)

        return final_video_path

# Gradio user interface
def gradio_predict(input_obj, input_obj_type, bg_color, fg_alpha, bars_color, bar_count, bar_width, caption_text):
    predictor = Predictor()
    if input_obj_type =="video":
        audio = convert_video_to_audio(video_input=input_obj)
    else:
        audio = input_obj
    result = predictor.predict(
        audio=audio,
        bg_color=bg_color,
        fg_alpha=fg_alpha,
        bars_color=bars_color,
        bar_count=bar_count,
        bar_width=bar_width,
        caption_text=caption_text
    )
    return result

def main():
    with gr.Blocks(title="audio2waveform", delete_cache=(86400, 86400)) as demo:
        with gr.Tabs(selected="video") as tabs:
            with gr.Tab("Video", id="video"):
                gr.Interface(
                    fn=gradio_predict,
                    inputs=[
                        gr.Video(source="upload", label="Video File"),
                        gr.Radio(choices=["video"], value="video", label="File Type"),
                        gr.Textbox(value="#000000", label="Background Color"),
                        gr.Slider(0, 1, value=0.75, label="Foreground Opacity"),
                        gr.ColorPicker(value="#ffffff", label="Bars Color"),
                        gr.Slider(10, 500, value=100, step=1, label="Number of Bars"),
                        gr.Slider(0, 1, value=0.4, step=0.1, label="Bar Width"),
                        gr.Textbox(value="", label="Caption Text")
                    ],
                    outputs=gr.Video(label="Waveform Video"),
                    live=False,
                    allow_flagging="never"
                )
            with gr.Tab("Audio", id="audio"):
                gr.Interface(
                    fn=gradio_predict,
                    inputs=[
                        gr.Audio(source="upload", type="filepath", label="Audio File"),
                        gr.Radio(choices=["audio"], value="audio", label="File Type"),
                        gr.Textbox(value="#000000", label="Background Color"),
                        gr.Slider(0, 1, value=0.75, label="Foreground Opacity"),
                        gr.ColorPicker(value="#ffffff", label="Bars Color"),
                        gr.Slider(10, 500, value=100, step=1, label="Number of Bars"),
                        gr.Slider(0, 1, value=0.4, step=0.1, label="Bar Width"),
                        gr.Textbox(value="", label="Caption Text")
                    ],
                    outputs=gr.Video(label="Waveform Video"),
                    live=False,
                    allow_flagging="never"
                )
        demo.launch()

if __name__ == "__main__":
    main()