Spaces:

hayas
/

kokoro-ja

Running on Zero

hayas commited on 12 days ago

Commit

20c2334

1 Parent(s): ef2657b

Update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,9 +25,22 @@ VOICES = [
 @spaces.GPU(duration=20)
 def run(text: str, voice: str, speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
-    """Generate audio from text using Kokoro.
-    Available voices are:
         - jf_alpha
         - jf_gongitsune
         - jf_nezumi
@@ -35,12 +48,12 @@ def run(text: str, voice: str, speed: float = 1.0) -> tuple[tuple[int, np.ndarra
         - jm_kumo
     Args:
-        text (str): Text to generate audio from.
-        voice (str): Voice to use.
-        speed (float): Speed of the audio. Defaults to 1.0.
     Returns:
-        tuple[tuple[int, np.ndarray], str]: Tuple of (sample rate, audio data) and the text.
     """
     generator = pipeline(
         text,

 @spaces.GPU(duration=20)
 def run(text: str, voice: str, speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
+    """Synthesizes speech from Japanese text using the Kokoro TTS model.
+    Note:
+        This model supports only Japanese input texts.
+    Voice Selection:
+        - The `voice` parameter specifies the speaker's characteristics and should follow the naming convention:
+        `<language/accent><gender>_<voice_name>`
+        - `<language/accent>`:
+            - 'j' for Japanese
+        - `<gender>`:
+            - 'f' for female
+            - 'm' for male
+        - Example: 'jf_alpha' indicates an Japanese female voice named Alpha.
+    Available voices:
         - jf_alpha
         - jf_gongitsune
         - jf_nezumi
         - jm_kumo
     Args:
+        text: Input text to be synthesized. Only Japanese text is supported. Non-Japanese input may result in errors or mispronunciations.
+        voice: Identifier for the voice to be used in synthesis. Defaults to "jf_alpha".
+        speed: Playback speed multiplier. A value of 1.0 means normal speed; values above or below adjust the speech rate accordingly. Defaults to 1.0.
     Returns:
+        A tuple containing the audio and the tokens used to generate the audio.
     """
     generator = pipeline(
         text,