dev-mode-python

Paused

File size: 1,762 Bytes

60a351e
ac6dc4e
ad94d02
323d1a0
ac6dc4e
ad94d02
 
 
 
14c5384
 
 
 
 
ad94d02
 
 
 
e93f063
ad94d02
 
 
 
 
 
323d1a0
69f55df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52d950c
 
 
 
 
 
 
69f55df

from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
from fastapi.responses import StreamingResponse, JSONResponse
import outetts
import io
import json
# Initialize the interface
interface = outetts.Interface(
    config=outetts.ModelConfig.auto_config(
        model=outetts.Models.VERSION_1_0_SIZE_1B,
        # For llama.cpp backend
        #backend=outetts.Backend.LLAMACPP,
        #quantization=outetts.LlamaCppQuantization.FP16
        # For transformers backend
        backend=outetts.Backend.HF,
    )
)

# Load the default speaker profile
speaker = interface.load_default_speaker("EN-FEMALE-1-NEUTRAL")

app = FastAPI()

@app.get("/")
def greet_json():
    return {"Hello": "World!"}

@app.websocket("/ws/tts")
async def websocket_tts(websocket: WebSocket):
    await websocket.accept()
    try:
        while True:
            # Empfange Text-Chunk vom Client
            data = await websocket.receive_text()
            # Generiere Audio aus Text
            output = interface.generate(
                config=outetts.GenerationConfig(
                    text=data,
                    generation_type=outetts.GenerationType.CHUNKED,
                    speaker=speaker,
                    sampler_config=outetts.SamplerConfig(
                        temperature=0.4
                    ),
                )
            )
             # Speichere Audio temporär als Datei
            temp_path = "temp.wav"
            output.save(temp_path)
            with open(temp_path, "rb") as f:
                audio_bytes = f.read()
            import os
            os.remove(temp_path)
            # Sende Audiodaten als Bytes zurück
            await websocket.send_bytes(audio_bytes)
    except WebSocketDisconnect:
        pass