from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect from fastapi.responses import StreamingResponse, JSONResponse import outetts import io import json # Initialize the interface interface = outetts.Interface( config=outetts.ModelConfig.auto_config( model=outetts.Models.VERSION_1_0_SIZE_1B, # For llama.cpp backend #backend=outetts.Backend.LLAMACPP, #quantization=outetts.LlamaCppQuantization.FP16 # For transformers backend backend=outetts.Backend.HF, ) ) # Load the default speaker profile speaker = interface.load_default_speaker("EN-FEMALE-1-NEUTRAL") app = FastAPI() @app.get("/") def greet_json(): return {"Hello": "World!"} @app.websocket("/ws/tts") async def websocket_tts(websocket: WebSocket): await websocket.accept() try: while True: # Empfange Text-Chunk vom Client data = await websocket.receive_text() # Generiere Audio aus Text output = interface.generate( config=outetts.GenerationConfig( text=data, generation_type=outetts.GenerationType.CHUNKED, speaker=speaker, sampler_config=outetts.SamplerConfig( temperature=0.4 ), ) ) # Speichere Audio temporär als Datei temp_path = "temp.wav" output.save(temp_path) with open(temp_path, "rb") as f: audio_bytes = f.read() import os os.remove(temp_path) # Sende Audiodaten als Bytes zurück await websocket.send_bytes(audio_bytes) except WebSocketDisconnect: pass