Spaces:
Paused
Paused
File size: 1,762 Bytes
60a351e ac6dc4e ad94d02 323d1a0 ac6dc4e ad94d02 14c5384 ad94d02 e93f063 ad94d02 323d1a0 69f55df 52d950c 69f55df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
from fastapi.responses import StreamingResponse, JSONResponse
import outetts
import io
import json
# Initialize the interface
interface = outetts.Interface(
config=outetts.ModelConfig.auto_config(
model=outetts.Models.VERSION_1_0_SIZE_1B,
# For llama.cpp backend
#backend=outetts.Backend.LLAMACPP,
#quantization=outetts.LlamaCppQuantization.FP16
# For transformers backend
backend=outetts.Backend.HF,
)
)
# Load the default speaker profile
speaker = interface.load_default_speaker("EN-FEMALE-1-NEUTRAL")
app = FastAPI()
@app.get("/")
def greet_json():
return {"Hello": "World!"}
@app.websocket("/ws/tts")
async def websocket_tts(websocket: WebSocket):
await websocket.accept()
try:
while True:
# Empfange Text-Chunk vom Client
data = await websocket.receive_text()
# Generiere Audio aus Text
output = interface.generate(
config=outetts.GenerationConfig(
text=data,
generation_type=outetts.GenerationType.CHUNKED,
speaker=speaker,
sampler_config=outetts.SamplerConfig(
temperature=0.4
),
)
)
# Speichere Audio temporär als Datei
temp_path = "temp.wav"
output.save(temp_path)
with open(temp_path, "rb") as f:
audio_bytes = f.read()
import os
os.remove(temp_path)
# Sende Audiodaten als Bytes zurück
await websocket.send_bytes(audio_bytes)
except WebSocketDisconnect:
pass
|