Spaces:

p1atdev
/

PodcastVox

Running

PodcastVox / src /voicevox.py

Plat

init

3a09141 6 days ago

4.1 kB

	import aiohttp
	from typing import Literal
	from pydantic import BaseModel
	import io
	import base64

	SpeakerId = int


	class SpeakerStyle(BaseModel):
	name: str
	id: SpeakerId
	type: Literal["talk"]


	class Speaker(BaseModel):
	name: str
	speaker_uuid: str
	styles: list[SpeakerStyle]
	version: str


	class AudioQuery(BaseModel):
	accent_phrases: list[dict]
	speedScale: float
	intonationScale: float
	tempoDynamicsScale: float \| None = None
	pitchScale: float
	volumeScale: float
	prePhonemeLength: float
	postPhonemeLength: float
	pauseLength: float \| None
	pauseLengthScale: float
	outputSamplingRate: int
	outputStereo: bool
	kana: str


	class Audio(BaseModel):
	wav: bytes


	class VoiceVoxClient:
	endpoint: str

	def __init__(self, endpoint: str = "http://127.0.0.1:50021"):
	self.endpoint = endpoint

	async def get_speakers(self) -> list[Speaker]:
	async with aiohttp.ClientSession() as session:
	async with session.get(f"{self.endpoint}/speakers") as response:
	if response.status != 200:
	raise Exception(f"Failed to get speakers: {response.status}")
	return [
	Speaker.model_validate(speaker) for speaker in await response.json()
	]

	async def get_core_versions(self) -> list[str]:
	async with aiohttp.ClientSession() as session:
	async with session.get(f"{self.endpoint}/core_versions") as response:
	if response.status != 200:
	raise Exception(f"Failed to get core version: {response.status}")
	return await response.json()

	async def post_audio_query(
	self,
	text: str,
	speaker: SpeakerId,
	core_version: str \| None = None,
	) -> AudioQuery:
	async with aiohttp.ClientSession() as session:
	params: dict[str, str \| int \| float] = {"text": text, "speaker": speaker}
	if core_version:
	params["core_version"] = core_version
	async with session.post(
	f"{self.endpoint}/audio_query",
	params=params,
	) as res:
	if res.status != 200:
	raise Exception(f"Failed to post audio query: {res.status}")
	json_data = await res.json()
	return AudioQuery.model_validate(json_data)

	async def post_synthesis(
	self,
	speaker: SpeakerId,
	audio_query: AudioQuery,
	enable_interrogative_upspeak: bool = True,
	core_version: str \| None = None,
	) -> Audio:
	async with aiohttp.ClientSession() as session:
	params: dict[str, str \| int \| float] = {
	"speaker": speaker,
	"enable_interrogative_upspeak": (
	"true" if enable_interrogative_upspeak else "false"
	),
	}
	if core_version:
	params["core_version"] = core_version
	async with session.post(
	f"{self.endpoint}/synthesis",
	params=params,
	json=audio_query.model_dump(),
	) as response:
	if response.status != 200:
	raise Exception(f"Failed to post synthesis: {response.status}")
	wav = io.BytesIO(await response.read())
	return Audio(wav=wav.getvalue())

	async def post_connect_waves(
	self,
	audio_list: list[Audio],
	) -> Audio:
	async with aiohttp.ClientSession() as session:
	audio_data = [
	base64.b64encode(audio.wav).decode("utf-8") for audio in audio_list
	]
	async with session.post(
	f"{self.endpoint}/connect_waves",
	json=audio_data,
	) as response:
	if response.status != 200:
	raise Exception(f"Failed to connect waves: {response.status}")
	wav = io.BytesIO(await response.read())
	return Audio(wav=wav.getvalue())