ECHOAI

Running on Zero

App Files Files Community

ECHOAI / webui.py

MPCIRCLE

Update webui.py

87a9d13 verified 15 days ago

raw

history blame contribute delete

2.64 kB

	import spaces
	import os
	import shutil
	import threading
	import time
	import sys

	from huggingface_hub import snapshot_download

	current_dir = os.path.dirname(os.path.abspath(__file__))
	sys.path.append(current_dir)
	sys.path.append(os.path.join(current_dir, "indextts"))

	import gradio as gr
	from indextts.infer import IndexTTS
	from tools.i18n.i18n import I18nAuto

	i18n = I18nAuto(language="zh_CN")
	MODE = 'local'
	snapshot_download("IndexTeam/IndexTTS-1.5",local_dir="checkpoints",)
	tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")

	os.makedirs("outputs/tasks",exist_ok=True)
	os.makedirs("prompts",exist_ok=True)

	@spaces.GPU
	def infer(voice, text,output_path=None):
	if not tts:
	raise Exception("Model not loaded")
	if not output_path:
	output_path = os.path.join("outputs", f"spk_{int(time.time())}.wav")
	tts.infer(voice, text, output_path)
	return output_path

	def gen_single(prompt, text):
	output_path = infer(prompt, text)
	return gr.update(value=output_path,visible=True)

	def update_prompt_audio():
	update_button = gr.update(interactive=True)
	return update_button


	with gr.Blocks() as demo:
	mutex = threading.Lock()
	gr.HTML('''
	<h2><center>Echo AI : High-Fidelity,Controllable, and Zero-Shot Text-to-Speech and voice cloning for the Real World</center></h2>

	<p align="center">
	<a href='https://arxiv.org/abs/2502.05512'><img src='https://img.shields.io/badge/ArXiv-2502.05512-red'></a>

	''')
	with gr.Tab("Voice cloning and audio generation"):
	with gr.Row():
	os.makedirs("prompts",exist_ok=True)
	prompt_audio = gr.Audio(label="Please upload reference audio",key="prompt_audio",
	sources=["upload","microphone"],type="filepath")
	prompt_list = os.listdir("prompts")
	default = ''
	if prompt_list:
	default = prompt_list[0]
	input_text_single = gr.Textbox(label="Please enter target text",key="input_text_single")
	gen_button = gr.Button("Generate speech",key="gen_button",interactive=True)
	output_audio = gr.Audio(label="Generate results", visible=False,key="output_audio")

	prompt_audio.upload(update_prompt_audio,
	inputs=[],
	outputs=[gen_button])

	gen_button.click(gen_single,
	inputs=[prompt_audio, input_text_single],
	outputs=[output_audio])


	def main():
	tts.load_normalizer()
	demo.queue(20)
	demo.launch(server_name="0.0.0.0",share=True)

	if __name__ == "__main__":
	main()