kokoro-ja / app.py
hayas's picture
MCP
a2f3696
raw
history blame
2.62 kB
#!/usr/bin/env python
import os
import shlex
import subprocess
if os.getenv("SPACE_ID"):
subprocess.run(shlex.split("python -m unidic download"), timeout=100, check=True) # noqa: S603
import gradio as gr
import numpy as np
import spaces
from kokoro import KPipeline
pipeline = KPipeline(lang_code="j")
VOICES = [
"jf_alpha",
"jf_gongitsune",
"jf_nezumi",
"jf_tebukuro",
"jm_kumo",
]
@spaces.GPU(duration=20)
def run(text: str, voice: str, speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
"""Generate audio from text using Kokoro.
Available voices are:
- jf_alpha
- jf_gongitsune
- jf_nezumi
- jf_tebukuro
- jm_kumo
Args:
text (str): Text to generate audio from.
voice (str): Voice to use.
speed (float): Speed of the audio. Defaults to 1.0.
Returns:
tuple[tuple[int, np.ndarray], str]: Tuple of (sample rate, audio data) and the text.
"""
generator = pipeline(
text,
voice=voice,
speed=speed,
split_pattern=r"\n+",
)
_, ps, audio = next(generator)
return (24000, audio.numpy()), ps
with gr.Blocks(css_paths="style.css") as demo:
gr.Markdown("# Kokoro (ja)")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input Text")
voice = gr.Dropdown(label="Voice", choices=VOICES, value=VOICES[0])
speed = gr.Slider(label="Speed", minimum=0.5, maximum=2.0, step=0.1, value=1.0)
run_button = gr.Button()
with gr.Column():
output_audio = gr.Audio(label="Output Audio", autoplay=True)
output_tokens = gr.Textbox(label="Output Tokens")
gr.Examples(
examples=[
"どうもこんにちは。今日はいい天気ですね。",
"隣の竹垣に竹立てかけたのは隣の竹垣に竹立てかけたかったからと骨粗鬆症の東京特許許可局局長がマサチューセッツ州の美術室で魔術師が呪術師と手術中に供述。",
"李も桃も桃のうち",
"隣の客はよく柿食う客だ",
"裏庭には二羽、庭には二羽、鶏がいる。",
"貴社の記者が汽車で帰社した。橋の端にある箸。石でできた医師。お食事券にまつわる汚職事件。",
],
inputs=input_text,
)
run_button.click(
fn=run,
inputs=[input_text, voice, speed],
outputs=[output_audio, output_tokens],
)
if __name__ == "__main__":
demo.launch(mcp_server=True)