Spaces:

masszhou
/

bgmseparator

Sleeping

File size: 3,279 Bytes

67a6eaa
892eb78
b6e698b
 
 
b22128f
 
 
bbf5927
b22128f
 
 
 
 
 
 
 
94dc74b
b6e698b
bbf5927
 
b6e698b
 
bbf5927
 
b6e698b
bbf5927
 
b22128f
bbf5927
 
 
b22128f
bbf5927
b22128f
bbf5927
 
 
 
 
 
 
 
 
 
b22128f
bbf5927
b6e698b
 
 
 
 
 
 
b22128f
b6e698b
 
 
 
 
 
 
 
 
 
 
 
 
f4b914d
 
b6e698b
 
 
b22128f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4dccbf
 
6e8f9db
 
a4dccbf
b6e698b
b22128f
 
 
 
a4dccbf
b22128f

# reference: https://huggingface.co/spaces/r3gm/Audio_separator
import gradio as gr
import shutil
import numpy as np
from pathlib import Path
import os
import time
import torch
from uvr_processing import process_uvr_task


def get_device_info():
    if torch.cuda.is_available():
        device = f"GPU ({torch.cuda.get_device_name(0)})"
    else:
        device = "CPU"
    return f"当前运行环境: {device}"


def inference(audio_file: str, 
              stem: str = "vocal",) -> list[str]:
    # audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'

    if not audio_file:
        raise ValueError("The audio path is missing.")

    if not stem:
        raise ValueError("Please select 'vocal' or 'background' stem.")

    audio_file = Path(audio_file)
    mdxnet_models_dir = Path("./mdx_models")
    output_dir = Path("./output")

    outputs = []
    start_time = time.time()
    background_path, vocals_path = process_uvr_task(
        mdxnet_models_dir=mdxnet_models_dir,
        input_file_path=audio_file,
        output_dir=output_dir,
        )
    end_time = time.time()
    execution_time = end_time - start_time
    
    outputs.append(str(background_path))
    outputs.append(str(vocals_path))

    return outputs


def get_gui(theme, title, description):
    with gr.Blocks(theme=theme) as app:
        # Add title and description
        gr.Markdown(title)
        gr.Markdown(description)
        gr.Markdown(get_device_info())
        
        audio_input = gr.Audio(label="Audio file", type="filepath")  # type: str | Path | bytes | tuple[int, np.ndarray] | None
        download_button = gr.Button("Inference")
        file_output = gr.File(label="Result", file_count="multiple", interactive=False)
        
        download_button.click(
            inference,
            inputs=[audio_input],
            outputs=[file_output],
        )
        
    return app


if __name__ == "__main__":
    title = "<center><strong><font size='7'>Vocal BGM Separator</font></strong></center>"
    description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
    theme = "NoCrypt/miku"

    BASE_DIR = "."  # os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    mdxnet_models_dir = os.path.join(BASE_DIR, "mdx_models")
    output_dir = os.path.join(BASE_DIR, "output")

    # confirm entry points from client
    # client_local = Client("http://127.0.0.1:7860")
    # client = Client(f"{HF_USERNAME}/{HF_SPACENAME}", hf_token=HF_TOKEN)
    # client_local.view_api()

    # entry point for GUI
    # predict(audio_file, api_name="/inference") -> result
    app_gui = get_gui(theme, title, description)

    # entry point for API
    # predict(audio_file, api_name="/predict") -> output
    app_api = gr.Interface(
        fn=inference,
        inputs=gr.Audio(type="filepath"),
        outputs=gr.File(file_count="multiple"),
    )

    app = gr.TabbedInterface(
        interface_list=[app_gui, app_api],
        tab_names=["GUI", "API"]
    )

    app.queue(default_concurrency_limit=40)
    app.launch()