File size: 4,099 Bytes
3a09141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import aiohttp
from typing import Literal
from pydantic import BaseModel
import io
import base64

SpeakerId = int


class SpeakerStyle(BaseModel):
    name: str
    id: SpeakerId
    type: Literal["talk"]


class Speaker(BaseModel):
    name: str
    speaker_uuid: str
    styles: list[SpeakerStyle]
    version: str


class AudioQuery(BaseModel):
    accent_phrases: list[dict]
    speedScale: float
    intonationScale: float
    tempoDynamicsScale: float | None = None
    pitchScale: float
    volumeScale: float
    prePhonemeLength: float
    postPhonemeLength: float
    pauseLength: float | None
    pauseLengthScale: float
    outputSamplingRate: int
    outputStereo: bool
    kana: str


class Audio(BaseModel):
    wav: bytes


class VoiceVoxClient:
    endpoint: str

    def __init__(self, endpoint: str = "http://127.0.0.1:50021"):
        self.endpoint = endpoint

    async def get_speakers(self) -> list[Speaker]:
        async with aiohttp.ClientSession() as session:
            async with session.get(f"{self.endpoint}/speakers") as response:
                if response.status != 200:
                    raise Exception(f"Failed to get speakers: {response.status}")
                return [
                    Speaker.model_validate(speaker) for speaker in await response.json()
                ]

    async def get_core_versions(self) -> list[str]:
        async with aiohttp.ClientSession() as session:
            async with session.get(f"{self.endpoint}/core_versions") as response:
                if response.status != 200:
                    raise Exception(f"Failed to get core version: {response.status}")
                return await response.json()

    async def post_audio_query(
        self,
        text: str,
        speaker: SpeakerId,
        core_version: str | None = None,
    ) -> AudioQuery:
        async with aiohttp.ClientSession() as session:
            params: dict[str, str | int | float] = {"text": text, "speaker": speaker}
            if core_version:
                params["core_version"] = core_version
            async with session.post(
                f"{self.endpoint}/audio_query",
                params=params,
            ) as res:
                if res.status != 200:
                    raise Exception(f"Failed to post audio query: {res.status}")
                json_data = await res.json()
                return AudioQuery.model_validate(json_data)

    async def post_synthesis(
        self,
        speaker: SpeakerId,
        audio_query: AudioQuery,
        enable_interrogative_upspeak: bool = True,
        core_version: str | None = None,
    ) -> Audio:
        async with aiohttp.ClientSession() as session:
            params: dict[str, str | int | float] = {
                "speaker": speaker,
                "enable_interrogative_upspeak": (
                    "true" if enable_interrogative_upspeak else "false"
                ),
            }
            if core_version:
                params["core_version"] = core_version
            async with session.post(
                f"{self.endpoint}/synthesis",
                params=params,
                json=audio_query.model_dump(),
            ) as response:
                if response.status != 200:
                    raise Exception(f"Failed to post synthesis: {response.status}")
                wav = io.BytesIO(await response.read())
                return Audio(wav=wav.getvalue())

    async def post_connect_waves(
        self,
        audio_list: list[Audio],
    ) -> Audio:
        async with aiohttp.ClientSession() as session:
            audio_data = [
                base64.b64encode(audio.wav).decode("utf-8") for audio in audio_list
            ]
            async with session.post(
                f"{self.endpoint}/connect_waves",
                json=audio_data,
            ) as response:
                if response.status != 200:
                    raise Exception(f"Failed to connect waves: {response.status}")
                wav = io.BytesIO(await response.read())
                return Audio(wav=wav.getvalue())