Spaces:
Running
Running
import aiohttp | |
from typing import Literal | |
from pydantic import BaseModel | |
import io | |
import base64 | |
SpeakerId = int | |
class SpeakerStyle(BaseModel): | |
name: str | |
id: SpeakerId | |
type: Literal["talk"] | |
class Speaker(BaseModel): | |
name: str | |
speaker_uuid: str | |
styles: list[SpeakerStyle] | |
version: str | |
class AudioQuery(BaseModel): | |
accent_phrases: list[dict] | |
speedScale: float | |
intonationScale: float | |
tempoDynamicsScale: float | None = None | |
pitchScale: float | |
volumeScale: float | |
prePhonemeLength: float | |
postPhonemeLength: float | |
pauseLength: float | None | |
pauseLengthScale: float | |
outputSamplingRate: int | |
outputStereo: bool | |
kana: str | |
class Audio(BaseModel): | |
wav: bytes | |
class VoiceVoxClient: | |
endpoint: str | |
def __init__(self, endpoint: str = "http://127.0.0.1:50021"): | |
self.endpoint = endpoint | |
async def get_speakers(self) -> list[Speaker]: | |
async with aiohttp.ClientSession() as session: | |
async with session.get(f"{self.endpoint}/speakers") as response: | |
if response.status != 200: | |
raise Exception(f"Failed to get speakers: {response.status}") | |
return [ | |
Speaker.model_validate(speaker) for speaker in await response.json() | |
] | |
async def get_core_versions(self) -> list[str]: | |
async with aiohttp.ClientSession() as session: | |
async with session.get(f"{self.endpoint}/core_versions") as response: | |
if response.status != 200: | |
raise Exception(f"Failed to get core version: {response.status}") | |
return await response.json() | |
async def post_audio_query( | |
self, | |
text: str, | |
speaker: SpeakerId, | |
core_version: str | None = None, | |
) -> AudioQuery: | |
async with aiohttp.ClientSession() as session: | |
params: dict[str, str | int | float] = {"text": text, "speaker": speaker} | |
if core_version: | |
params["core_version"] = core_version | |
async with session.post( | |
f"{self.endpoint}/audio_query", | |
params=params, | |
) as res: | |
if res.status != 200: | |
raise Exception(f"Failed to post audio query: {res.status}") | |
json_data = await res.json() | |
return AudioQuery.model_validate(json_data) | |
async def post_synthesis( | |
self, | |
speaker: SpeakerId, | |
audio_query: AudioQuery, | |
enable_interrogative_upspeak: bool = True, | |
core_version: str | None = None, | |
) -> Audio: | |
async with aiohttp.ClientSession() as session: | |
params: dict[str, str | int | float] = { | |
"speaker": speaker, | |
"enable_interrogative_upspeak": ( | |
"true" if enable_interrogative_upspeak else "false" | |
), | |
} | |
if core_version: | |
params["core_version"] = core_version | |
async with session.post( | |
f"{self.endpoint}/synthesis", | |
params=params, | |
json=audio_query.model_dump(), | |
) as response: | |
if response.status != 200: | |
raise Exception(f"Failed to post synthesis: {response.status}") | |
wav = io.BytesIO(await response.read()) | |
return Audio(wav=wav.getvalue()) | |
async def post_connect_waves( | |
self, | |
audio_list: list[Audio], | |
) -> Audio: | |
async with aiohttp.ClientSession() as session: | |
audio_data = [ | |
base64.b64encode(audio.wav).decode("utf-8") for audio in audio_list | |
] | |
async with session.post( | |
f"{self.endpoint}/connect_waves", | |
json=audio_data, | |
) as response: | |
if response.status != 200: | |
raise Exception(f"Failed to connect waves: {response.status}") | |
wav = io.BytesIO(await response.read()) | |
return Audio(wav=wav.getvalue()) | |