Spaces:
Paused
Paused
from fastapi import FastAPI, Request | |
from fastapi.responses import StreamingResponse, JSONResponse | |
import outetts | |
import io | |
import json | |
# Initialize the interface | |
interface = outetts.Interface( | |
config=outetts.ModelConfig.auto_config( | |
model=outetts.Models.VERSION_1_0_SIZE_1B, | |
# For llama.cpp backend | |
#backend=outetts.Backend.LLAMACPP, | |
#quantization=outetts.LlamaCppQuantization.FP16 | |
# For transformers backend | |
backend=outetts.Backend.HF, | |
) | |
) | |
# Load the default speaker profile | |
speaker = interface.load_default_speaker("EN-FEMALE-1-NEUTRAL") | |
app = FastAPI() | |
def greet_json(): | |
return {"Hello": "World!"} | |
async def tts_endpoint(request: Request): | |
""" | |
Accepts JSON {"text": "..."} and streams the generated audio as WAV. | |
""" | |
try: | |
data = await request.json() | |
text = data.get("text") | |
if not text: | |
return JSONResponse({"error": "Missing 'text' in request"}, status_code=400) | |
# Generate audio from text | |
output = interface.generate( | |
config=outetts.GenerationConfig( | |
text=text, | |
generation_type=outetts.GenerationType.CHUNKED, | |
speaker=speaker, | |
sampler_config=outetts.SamplerConfig( | |
temperature=0.4 | |
), | |
) | |
) | |
audio_buffer = io.BytesIO() | |
output.save(audio_buffer) | |
audio_buffer.seek(0) | |
def audio_stream(): | |
yield audio_buffer.read() | |
return StreamingResponse(audio_stream(), media_type="audio/wav") | |
except Exception as e: | |
return JSONResponse({"error": str(e)}, status_code=500) | |
# WebSocket endpoint removed; use POST /tts for TTS requests. | |