Spaces:
Runtime error
Runtime error
| import io | |
| import os | |
| import tempfile | |
| from typing import List | |
| import TTS.api | |
| import torch | |
| from pydub import AudioSegment | |
| import gradio as gr # Gradio库 | |
| import config | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| models = {} | |
| for id, model in config.models.items(): | |
| models[id] = TTS.api.TTS(model).to(device) | |
| def synthesize_tts( | |
| text: str = 'Hello, World!', | |
| speaker_wavs: List[gr.File] = None, | |
| speaker_idx: str = 'Ana Florence', | |
| language: str = 'ja', | |
| temperature: float = 0.65, | |
| length_penalty: float = 1.0, | |
| repetition_penalty: float = 2.0, | |
| top_k: int = 50, | |
| top_p: float = 0.8, | |
| speed: float = 1.0, | |
| enable_text_splitting: bool = True, | |
| ): | |
| temp_files = [] | |
| try: | |
| if speaker_wavs: | |
| # Process each uploaded file | |
| for speaker_wav in speaker_wavs: | |
| speaker_wav_bytes = speaker_wav.read() | |
| # Convert the uploaded audio file to a WAV format using pydub | |
| try: | |
| audio = AudioSegment.from_file(io.BytesIO(speaker_wav_bytes)) | |
| wav_buffer = io.BytesIO() | |
| audio.export(wav_buffer, format="wav") | |
| wav_buffer.seek(0) # Reset buffer position to the beginning | |
| except Exception as e: | |
| return f"Error processing audio file: {e}" | |
| temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| temp_wav_file.write(wav_buffer.read()) | |
| temp_wav_file.close() | |
| temp_files.append(temp_wav_file.name) | |
| output_buffer = io.BytesIO() | |
| if temp_files: | |
| models['multi'].tts_to_file( | |
| text=text, | |
| speaker_wav=temp_files, | |
| language=language, | |
| file_path=output_buffer, | |
| temperature=temperature, | |
| length_penalty=length_penalty, | |
| repetition_penalty=repetition_penalty, | |
| top_k=top_k, | |
| top_p=top_p, | |
| speed=speed, | |
| enable_text_splitting=enable_text_splitting | |
| ) | |
| else: | |
| models['multi'].tts_to_file( | |
| text=text, | |
| speaker=speaker_idx, | |
| language=language, | |
| file_path=output_buffer, | |
| temperature=temperature, | |
| length_penalty=length_penalty, | |
| repetition_penalty=repetition_penalty, | |
| top_k=top_k, | |
| top_p=top_p, | |
| speed=speed, | |
| enable_text_splitting=enable_text_splitting | |
| ) | |
| output_buffer.seek(0) | |
| return output_buffer.read() | |
| finally: | |
| for temp_file in temp_files: | |
| if isinstance(temp_file, str) and os.path.exists(temp_file): | |
| os.remove(temp_file) | |
| # 创建Gradio界面 | |
| inputs = [ | |
| gr.Textbox(value="Hello, World!", label="Text to Synthesize"), | |
| gr.File(file_types=["audio"], label="Speaker WAV files (optional)", optional=True, multiple=True), | |
| gr.Textbox(value="Ana Florence", label="Speaker Index"), | |
| gr.Textbox(value="ja", label="Language"), | |
| gr.Slider(0, 1, value=0.65, step=0.01, label="Temperature"), | |
| gr.Slider(0.5, 2, value=1.0, step=0.1, label="Length Penalty"), | |
| gr.Slider(1, 10, value=2.0, step=0.1, label="Repetition Penalty"), | |
| gr.Slider(1, 100, value=50, step=1, label="Top-K"), | |
| gr.Slider(0, 1, value=0.8, step=0.01, label="Top-P"), | |
| gr.Slider(0.5, 2, value=1.0, step=0.01, label="Speed"), | |
| gr.Checkbox(value=True, label="Enable Text Splitting") | |
| ] | |
| outputs = gr.Audio(label="Generated Speech") | |
| gr.Interface( | |
| fn=synthesize_tts, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title="Text-to-Speech Synthesis with Gradio" | |
| ).launch() | |