Update
Browse files
app.py
CHANGED
@@ -25,9 +25,22 @@ VOICES = [
|
|
25 |
|
26 |
@spaces.GPU(duration=20)
|
27 |
def run(text: str, voice: str, speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
|
28 |
-
"""
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
- jf_alpha
|
32 |
- jf_gongitsune
|
33 |
- jf_nezumi
|
@@ -35,12 +48,12 @@ def run(text: str, voice: str, speed: float = 1.0) -> tuple[tuple[int, np.ndarra
|
|
35 |
- jm_kumo
|
36 |
|
37 |
Args:
|
38 |
-
text
|
39 |
-
voice
|
40 |
-
speed
|
41 |
|
42 |
Returns:
|
43 |
-
tuple
|
44 |
"""
|
45 |
generator = pipeline(
|
46 |
text,
|
|
|
25 |
|
26 |
@spaces.GPU(duration=20)
|
27 |
def run(text: str, voice: str, speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
|
28 |
+
"""Synthesizes speech from Japanese text using the Kokoro TTS model.
|
29 |
|
30 |
+
Note:
|
31 |
+
This model supports only Japanese input texts.
|
32 |
+
|
33 |
+
Voice Selection:
|
34 |
+
- The `voice` parameter specifies the speaker's characteristics and should follow the naming convention:
|
35 |
+
`<language/accent><gender>_<voice_name>`
|
36 |
+
- `<language/accent>`:
|
37 |
+
- 'j' for Japanese
|
38 |
+
- `<gender>`:
|
39 |
+
- 'f' for female
|
40 |
+
- 'm' for male
|
41 |
+
- Example: 'jf_alpha' indicates an Japanese female voice named Alpha.
|
42 |
+
|
43 |
+
Available voices:
|
44 |
- jf_alpha
|
45 |
- jf_gongitsune
|
46 |
- jf_nezumi
|
|
|
48 |
- jm_kumo
|
49 |
|
50 |
Args:
|
51 |
+
text: Input text to be synthesized. Only Japanese text is supported. Non-Japanese input may result in errors or mispronunciations.
|
52 |
+
voice: Identifier for the voice to be used in synthesis. Defaults to "jf_alpha".
|
53 |
+
speed: Playback speed multiplier. A value of 1.0 means normal speed; values above or below adjust the speech rate accordingly. Defaults to 1.0.
|
54 |
|
55 |
Returns:
|
56 |
+
A tuple containing the audio and the tokens used to generate the audio.
|
57 |
"""
|
58 |
generator = pipeline(
|
59 |
text,
|