Spaces:
Sleeping
Sleeping
File size: 3,279 Bytes
67a6eaa 892eb78 b6e698b b22128f bbf5927 b22128f 94dc74b b6e698b bbf5927 b6e698b bbf5927 b6e698b bbf5927 b22128f bbf5927 b22128f bbf5927 b22128f bbf5927 b22128f bbf5927 b6e698b b22128f b6e698b f4b914d b6e698b b22128f a4dccbf 6e8f9db a4dccbf b6e698b b22128f a4dccbf b22128f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# reference: https://huggingface.co/spaces/r3gm/Audio_separator
import gradio as gr
import shutil
import numpy as np
from pathlib import Path
import os
import time
import torch
from uvr_processing import process_uvr_task
def get_device_info():
if torch.cuda.is_available():
device = f"GPU ({torch.cuda.get_device_name(0)})"
else:
device = "CPU"
return f"当前运行环境: {device}"
def inference(audio_file: str,
stem: str = "vocal",) -> list[str]:
# audio_file = '/private/var/folders/02/_9ymjkz12xq8m_xh5592pl840000gn/T/gradio/74c3de047a439ea3cfb8e2d1ee6e5a85ea999d3eb30537b88d386aac177902d0/Spare Zeit und Aufwand mit den Servicevertragen von Mercedes-Benz Trucks..m4a'
if not audio_file:
raise ValueError("The audio path is missing.")
if not stem:
raise ValueError("Please select 'vocal' or 'background' stem.")
audio_file = Path(audio_file)
mdxnet_models_dir = Path("./mdx_models")
output_dir = Path("./output")
outputs = []
start_time = time.time()
background_path, vocals_path = process_uvr_task(
mdxnet_models_dir=mdxnet_models_dir,
input_file_path=audio_file,
output_dir=output_dir,
)
end_time = time.time()
execution_time = end_time - start_time
outputs.append(str(background_path))
outputs.append(str(vocals_path))
return outputs
def get_gui(theme, title, description):
with gr.Blocks(theme=theme) as app:
# Add title and description
gr.Markdown(title)
gr.Markdown(description)
gr.Markdown(get_device_info())
audio_input = gr.Audio(label="Audio file", type="filepath") # type: str | Path | bytes | tuple[int, np.ndarray] | None
download_button = gr.Button("Inference")
file_output = gr.File(label="Result", file_count="multiple", interactive=False)
download_button.click(
inference,
inputs=[audio_input],
outputs=[file_output],
)
return app
if __name__ == "__main__":
title = "<center><strong><font size='7'>Vocal BGM Separator</font></strong></center>"
description = "This demo uses the MDX-Net models to perform Ultimate Vocal Remover (uvr) task for vocal and background sound separation."
theme = "NoCrypt/miku"
BASE_DIR = "." # os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
mdxnet_models_dir = os.path.join(BASE_DIR, "mdx_models")
output_dir = os.path.join(BASE_DIR, "output")
# confirm entry points from client
# client_local = Client("http://127.0.0.1:7860")
# client = Client(f"{HF_USERNAME}/{HF_SPACENAME}", hf_token=HF_TOKEN)
# client_local.view_api()
# entry point for GUI
# predict(audio_file, api_name="/inference") -> result
app_gui = get_gui(theme, title, description)
# entry point for API
# predict(audio_file, api_name="/predict") -> output
app_api = gr.Interface(
fn=inference,
inputs=gr.Audio(type="filepath"),
outputs=gr.File(file_count="multiple"),
)
app = gr.TabbedInterface(
interface_list=[app_gui, app_api],
tab_names=["GUI", "API"]
)
app.queue(default_concurrency_limit=40)
app.launch() |