Voice-Clone-Multilingual

Running

Sami

Adjust for ZeroGPU

802577d 4 months ago

2.05 kB

	import subprocess
	import spaces
	import os

	# Run the setup.py install command
	try:
	subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
	print("Installation successful.")
	except subprocess.CalledProcessError as e:
	print(f"Installation failed with error: {e}")

	import gradio as gr
	import torch
	from TTS.api import TTS

	# Get device
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Initialize TTS model globally but load it inside the GPU-decorated function
	tts = None

	@spaces.GPU(duration=120) # Voice cloning can take longer than default 60s
	def initialize_tts():
	global tts
	if tts is None:
	tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
	return tts

	@spaces.GPU(duration=120)
	def voice_clone(text: str, speaker_wav: str, language: str):
	global tts
	# Initialize TTS if not already done
	if tts is None:
	tts = initialize_tts()

	# Create output directory if it doesn't exist
	os.makedirs("outputs", exist_ok=True)
	output_path = os.path.join("outputs", "output.wav")

	# Run TTS
	print("Speaker wav:", speaker_wav)
	tts.tts_to_file(text=text,
	speaker_wav=speaker_wav,
	language=language,
	file_path=output_path)
	return output_path

	# Create Gradio interface
	iface = gr.Interface(
	fn=voice_clone,
	theme="Nymbo/Nymbo_Theme",
	inputs=[
	gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
	gr.Audio(type="filepath", label="Upload audio file"),
	gr.Radio(
	['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'],
	label="language"
	),
	],
	outputs=gr.Audio(type="filepath", label="Generated audio file"),
	title="Voice Cloning",
	description="Upload a voice sample and enter text to clone the voice. Processing may take 1-2 minutes."
	)

	# Launch with queue enabled for better handling of GPU resources
	iface.queue().launch()