|
import subprocess |
|
import spaces |
|
import os |
|
|
|
|
|
try: |
|
subprocess.run(['python', 'setup.py', 'install', '--user'], check=True) |
|
print("Installation successful.") |
|
except subprocess.CalledProcessError as e: |
|
print(f"Installation failed with error: {e}") |
|
|
|
import gradio as gr |
|
import torch |
|
from TTS.api import TTS |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
tts = None |
|
|
|
@spaces.GPU(duration=120) |
|
def initialize_tts(): |
|
global tts |
|
if tts is None: |
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) |
|
return tts |
|
|
|
@spaces.GPU(duration=120) |
|
def voice_clone(text: str, speaker_wav: str, language: str): |
|
global tts |
|
|
|
if tts is None: |
|
tts = initialize_tts() |
|
|
|
|
|
os.makedirs("outputs", exist_ok=True) |
|
output_path = os.path.join("outputs", "output.wav") |
|
|
|
|
|
print("Speaker wav:", speaker_wav) |
|
tts.tts_to_file(text=text, |
|
speaker_wav=speaker_wav, |
|
language=language, |
|
file_path=output_path) |
|
return output_path |
|
|
|
|
|
iface = gr.Interface( |
|
fn=voice_clone, |
|
theme="Nymbo/Nymbo_Theme", |
|
inputs=[ |
|
gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"), |
|
gr.Audio(type="filepath", label="Upload audio file"), |
|
gr.Radio( |
|
['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], |
|
label="language" |
|
), |
|
], |
|
outputs=gr.Audio(type="filepath", label="Generated audio file"), |
|
title="Voice Cloning", |
|
description="Upload a voice sample and enter text to clone the voice. Processing may take 1-2 minutes." |
|
) |
|
|
|
|
|
iface.queue().launch() |