dev-mode-python / app.py
Tomtom84's picture
up10
ac6dc4e
raw
history blame
1.79 kB
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
import outetts
import io
import json
# Initialize the interface
interface = outetts.Interface(
config=outetts.ModelConfig.auto_config(
model=outetts.Models.VERSION_1_0_SIZE_1B,
# For llama.cpp backend
#backend=outetts.Backend.LLAMACPP,
#quantization=outetts.LlamaCppQuantization.FP16
# For transformers backend
backend=outetts.Backend.HF,
)
)
# Load the default speaker profile
speaker = interface.load_default_speaker("EN-FEMALE-1-NEUTRAL")
app = FastAPI()
@app.get("/")
def greet_json():
return {"Hello": "World!"}
@app.post("/tts")
async def tts_endpoint(request: Request):
"""
Accepts JSON {"text": "..."} and streams the generated audio as WAV.
"""
try:
data = await request.json()
text = data.get("text")
if not text:
return JSONResponse({"error": "Missing 'text' in request"}, status_code=400)
# Generate audio from text
output = interface.generate(
config=outetts.GenerationConfig(
text=text,
generation_type=outetts.GenerationType.CHUNKED,
speaker=speaker,
sampler_config=outetts.SamplerConfig(
temperature=0.4
),
)
)
audio_buffer = io.BytesIO()
output.save(audio_buffer)
audio_buffer.seek(0)
def audio_stream():
yield audio_buffer.read()
return StreamingResponse(audio_stream(), media_type="audio/wav")
except Exception as e:
return JSONResponse({"error": str(e)}, status_code=500)
# WebSocket endpoint removed; use POST /tts for TTS requests.