BryanBradfo commited on
Commit
c5dd1ce
·
1 Parent(s): 2f67766

voicebloom

Browse files
Files changed (1) hide show
  1. app.py +150 -103
app.py CHANGED
@@ -1,57 +1,94 @@
1
- import spaces
2
- from kokoro import KModel, KPipeline
3
- import gradio as gr
4
  import os
5
  import random
6
  import torch
 
 
7
 
8
- CUDA_AVAILABLE = False # Set to False to disable GPU usage
9
- models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
11
  pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
12
  pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
13
 
14
- def generate_first(text, voice='af_heart', speed=1):
15
- pipeline = pipelines[voice[0]]
16
- pack = pipeline.load_voice(voice)
17
- for _, ps, _ in pipeline(text, voice, speed):
18
- ref_s = pack[len(ps)-1]
19
- audio = models[False](ps, ref_s, speed)
20
- return (24000, audio.numpy()), ps
21
- return None, ''
22
-
23
- def tokenize_first(text, voice='af_heart'):
24
- pipeline = pipelines[voice[0]]
25
- for _, ps, _ in pipeline(text, voice):
26
- return ps
27
- return ''
 
 
28
 
29
- def generate_all(text, voice='af_heart', speed=1):
30
- pipeline = pipelines[voice[0]]
31
- pack = pipeline.load_voice(voice)
32
- first = True
33
- for _, ps, _ in pipeline(text, voice, speed):
34
- ref_s = pack[len(ps)-1]
35
- audio = models[False](ps, ref_s, speed)
36
- yield 24000, audio.numpy()
37
- if first:
38
- first = False
39
- yield 24000, torch.zeros(1).numpy()
40
 
41
- # Wisdom quotes
42
  QUOTES = [
43
  "When it is obvious that the goals cannot be reached, don't adjust the goals, adjust the action steps. - Confucius",
44
  "The man who moves a mountain begins by carrying away small stones. - Confucius",
45
  "Life is really simple, but we insist on making it complicated. - Confucius",
46
  "It does not matter how slowly you go as long as you do not stop. - Confucius",
 
 
47
  "Eating fruit daily provides essential vitamins, minerals and fiber that help maintain good health.",
48
  "An apple a day keeps the doctor away - a simple habit with profound health benefits.",
49
  "Fruits are nature's candy - sweet, nutritious, and vital for our wellbeing.",
50
  "Regular consumption of fruits boosts your immune system and reduces risk of chronic diseases.",
51
  "Colorful fruits on your plate mean a rainbow of nutrients for your body.",
52
- "The wisdom of health lies in eating seasonal fruits that nature provides us."
 
 
 
53
  ]
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def get_random_quote():
56
  return random.choice(QUOTES)
57
 
@@ -77,79 +114,89 @@ Eating seasonal fruits connects us with nature's rhythm and ensures we get the f
77
 
78
  Remember: 'Nature's first green is gold' - and nowhere is this more true than in the vibrant colors of fruit that nourish our bodies daily."""
79
 
80
- CHOICES = {
81
- '🇺🇸 🚺 Heart ❤️': 'af_heart',
82
- '🇺🇸 🚺 Bella 🔥': 'af_bella',
83
- '🇺🇸 🚺 Nicole 🎧': 'af_nicole',
84
- '🇺🇸 🚺 Aoede': 'af_aoede',
85
- '🇺🇸 🚺 Kore': 'af_kore',
86
- '🇺🇸 🚺 Sarah': 'af_sarah',
87
- '🇺🇸 🚹 Michael': 'am_michael',
88
- '🇺🇸 🚹 Fenrir': 'am_fenrir',
89
- '🇺🇸 🚹 Puck': 'am_puck',
90
- '🇺🇸 🚹 Echo': 'am_echo',
91
- '🇬🇧 🚺 Emma': 'bf_emma',
92
- '🇬🇧 🚺 Isabella': 'bf_isabella',
93
- '🇬🇧 🚹 George': 'bm_george',
94
- '🇬🇧 🚹 Fable': 'bm_fable',
95
- }
96
- for v in CHOICES.values():
97
- pipelines[v[0]].load_voice(v)
98
-
99
- TOKEN_NOTE = '''
100
- 💡 Customize pronunciation with Markdown link syntax and /slashes/ like `[Kokoro](/kˈOkəɹO/)`
101
- 💬 To adjust intonation, try punctuation `;:,.!?—…"()""` or stress `ˈ` and `ˌ`
102
- ⬇️ Lower stress `[1 level](-1)` or `[2 levels](-2)`
103
- ⬆️ Raise stress 1 level `[or](+2)` 2 levels (only works on less stressed, usually short words)
104
- '''
105
-
106
- with gr.Blocks() as generate_tab:
107
- out_audio = gr.Audio(label='Output Audio', interactive=False, streaming=False, autoplay=True)
108
- generate_btn = gr.Button('Generate', variant='primary')
109
- with gr.Accordion('Output Tokens', open=True):
110
- out_ps = gr.Textbox(interactive=False, show_label=False, info='Tokens used to generate the audio, up to 510 context length.')
111
- tokenize_btn = gr.Button('Tokenize', variant='secondary')
112
- gr.Markdown(TOKEN_NOTE)
113
-
114
- STREAM_NOTE = ['⚠️ There is an unknown Gradio bug that might yield no audio the first time you click `Stream`.']
115
- STREAM_NOTE = '\n\n'.join(STREAM_NOTE)
116
-
117
- with gr.Blocks() as stream_tab:
118
- out_stream = gr.Audio(label='Output Audio Stream', interactive=False, streaming=True, autoplay=True)
119
- with gr.Row():
120
- stream_btn = gr.Button('Stream', variant='primary')
121
- stop_btn = gr.Button('Stop', variant='stop')
122
- with gr.Accordion('Note', open=True):
123
- gr.Markdown(STREAM_NOTE)
124
-
125
- BANNER_TEXT = '''
126
- # Kokoro TTS - Wisdom Speaker
127
-
128
- This demo uses the open-weight Kokoro TTS model to convert wisdom quotes into speech.
129
- '''
130
-
131
- with gr.Blocks() as app:
132
- with gr.Row():
133
- gr.Markdown(BANNER_TEXT, container=True)
134
  with gr.Row():
135
  with gr.Column():
136
- text = gr.Textbox(label='Input Text', info="Up to ~500 characters per Generate")
137
- with gr.Row():
138
- voice = gr.Dropdown(list(CHOICES.items()), value='af_heart', label='Voice', info='Select from various voices')
139
- speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='Speed')
140
- random_btn = gr.Button('🎲 Random Wisdom Quote 💬', variant='secondary')
141
- with gr.Row():
142
- confucius_btn = gr.Button('🧙 Confucius Wisdom 📜', variant='secondary')
143
- fruit_btn = gr.Button('🍎 Fruit Wisdom 🍊', variant='secondary')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  with gr.Column():
145
- gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
146
- random_btn.click(fn=get_random_quote, inputs=[], outputs=[text])
147
- confucius_btn.click(fn=get_confucius, inputs=[], outputs=[text])
148
- fruit_btn.click(fn=get_fruit_wisdom, inputs=[], outputs=[text])
149
- generate_btn.click(fn=generate_first, inputs=[text, voice, speed], outputs=[out_audio, out_ps])
150
- tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps])
151
- stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed], outputs=[out_stream])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  stop_btn.click(fn=None, cancels=stream_event)
153
 
154
- if __name__ == '__main__':
155
- app.queue().launch()
 
 
 
 
1
  import os
2
  import random
3
  import torch
4
+ from kokoro import KModel, KPipeline
5
+ import gradio as gr
6
 
7
+ # App configuration
8
+ APP_TITLE = "✨ VoiceBloom ✨"
9
+ APP_SUBTITLE = "Transform wisdom into delightful speech!"
10
+ THEME = gr.themes.Soft(
11
+ primary_hue="indigo",
12
+ secondary_hue="purple",
13
+ ).set(
14
+ body_background_fill="linear-gradient(to right top, #d16ba5, #c777b9, #ba83ca, #aa8fd8, #9a9ae1, #8aa7ec, #79b3f4, #69bff8, #52cffe, #41dfff, #46eefa, #5ffbf1)",
15
+ button_primary_background_fill="linear-gradient(90deg, rgba(255,124,0,1) 0%, rgba(255,194,23,1) 100%)",
16
+ button_primary_background_fill_hover="linear-gradient(90deg, rgba(255,194,23,1) 0%, rgba(255,124,0,1) 100%)",
17
+ button_secondary_background_fill="linear-gradient(90deg, rgba(144,95,255,1) 0%, rgba(110,72,220,1) 100%)",
18
+ button_secondary_background_fill_hover="linear-gradient(90deg, rgba(110,72,220,1) 0%, rgba(144,95,255,1) 100%)",
19
+ block_background_fill="rgba(255, 255, 255, 0.8)",
20
+ block_shadow="0px 4px 12px rgba(0, 0, 0, 0.1)",
21
+ block_radius="12px",
22
+ )
23
+
24
+ # Initialize TTS models without GPU
25
+ models = {False: KModel().to('cpu').eval()}
26
  pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
27
  pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
28
  pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
29
 
30
+ # Define voice options with fun emoji indicators
31
+ VOICES = {
32
+ '✨ Sarah (US) ✨': 'af_sarah',
33
+ '🌟 Nicole (US) 🌟': 'af_nicole',
34
+ '💖 Heart (US) 💖': 'af_heart',
35
+ '🔮 Bella (US) 🔮': 'af_bella',
36
+ '🌈 Aoede (US) 🌈': 'af_aoede',
37
+ '🎵 Michael (US) 🎵': 'am_michael',
38
+ '🌠 Echo (US) 🌠': 'am_echo',
39
+ '🧙 Fenrir (US) 🧙': 'am_fenrir',
40
+ '🎭 Puck (US) 🎭': 'am_puck',
41
+ '👑 Emma (UK) 👑': 'bf_emma',
42
+ '🌹 Isabella (UK) 🌹': 'bf_isabella',
43
+ '🎩 George (UK) 🎩': 'bm_george',
44
+ '✨ Fable (UK) ✨': 'bm_fable',
45
+ }
46
 
47
+ # Preload voices
48
+ for v in VOICES.values():
49
+ pipelines[v[0]].load_voice(v)
 
 
 
 
 
 
 
 
50
 
51
+ # Inspirational quotes
52
  QUOTES = [
53
  "When it is obvious that the goals cannot be reached, don't adjust the goals, adjust the action steps. - Confucius",
54
  "The man who moves a mountain begins by carrying away small stones. - Confucius",
55
  "Life is really simple, but we insist on making it complicated. - Confucius",
56
  "It does not matter how slowly you go as long as you do not stop. - Confucius",
57
+ "Our greatest glory is not in never falling, but in rising every time we fall. - Confucius",
58
+ "Silence is a true friend who never betrays. - Confucius",
59
  "Eating fruit daily provides essential vitamins, minerals and fiber that help maintain good health.",
60
  "An apple a day keeps the doctor away - a simple habit with profound health benefits.",
61
  "Fruits are nature's candy - sweet, nutritious, and vital for our wellbeing.",
62
  "Regular consumption of fruits boosts your immune system and reduces risk of chronic diseases.",
63
  "Colorful fruits on your plate mean a rainbow of nutrients for your body.",
64
+ "The wisdom of health lies in eating seasonal fruits that nature provides us.",
65
+ "A journey of a thousand miles begins with a single step.",
66
+ "Happiness is not something ready-made. It comes from your own actions.",
67
+ "The best time to plant a tree was 20 years ago. The second best time is now.",
68
  ]
69
 
70
+ def generate_audio(text, voice, speed):
71
+ """Generate audio from text using the selected voice and speed"""
72
+ pipeline = pipelines[voice[0]]
73
+ pack = pipeline.load_voice(voice)
74
+
75
+ for _, ps, _ in pipeline(text, voice, speed):
76
+ ref_s = pack[len(ps)-1]
77
+ audio = models[False](ps, ref_s, speed)
78
+ return (24000, audio.numpy())
79
+
80
+ return None
81
+
82
+ def stream_audio(text, voice, speed):
83
+ """Stream audio for longer texts"""
84
+ pipeline = pipelines[voice[0]]
85
+ pack = pipeline.load_voice(voice)
86
+
87
+ for _, ps, _ in pipeline(text, voice, speed):
88
+ ref_s = pack[len(ps)-1]
89
+ audio = models[False](ps, ref_s, speed)
90
+ yield 24000, audio.numpy()
91
+
92
  def get_random_quote():
93
  return random.choice(QUOTES)
94
 
 
114
 
115
  Remember: 'Nature's first green is gold' - and nowhere is this more true than in the vibrant colors of fruit that nourish our bodies daily."""
116
 
117
+ # Build the UI
118
+ with gr.Blocks(theme=THEME) as app:
119
+ gr.Markdown(f"# {APP_TITLE}")
120
+ gr.Markdown(f"### {APP_SUBTITLE}")
121
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  with gr.Row():
123
  with gr.Column():
124
+ # Input section with colorful styling
125
+ with gr.Box():
126
+ gr.Markdown("### 📝 What would you like me to say?")
127
+ text_input = gr.Textbox(
128
+ label="",
129
+ placeholder="Type your text here or choose a wisdom quote below...",
130
+ lines=5
131
+ )
132
+
133
+ with gr.Row():
134
+ random_btn = gr.Button("🎲 Random Quote", variant="secondary")
135
+ confucius_btn = gr.Button("🧙 Confucius Wisdom", variant="secondary")
136
+ fruit_btn = gr.Button("🍎 Fruit Wisdom", variant="secondary")
137
+
138
+ # Voice & Speed controls
139
+ with gr.Box():
140
+ gr.Markdown("### 🎤 Choose Your Voice")
141
+ voice_dropdown = gr.Dropdown(
142
+ list(VOICES.items()),
143
+ value=list(VOICES.items())[0][1],
144
+ label="Voice Style"
145
+ )
146
+
147
+ gr.Markdown("### ⏱️ Adjust Speaking Speed")
148
+ speed_slider = gr.Slider(
149
+ minimum=0.5,
150
+ maximum=2.0,
151
+ value=1.0,
152
+ step=0.1,
153
+ label="Speed"
154
+ )
155
+
156
+ # Output section
157
  with gr.Column():
158
+ with gr.Tabs():
159
+ with gr.TabItem("🔊 Listen"):
160
+ audio_output = gr.Audio(
161
+ label="Your VoiceBloom Creation",
162
+ interactive=False,
163
+ autoplay=True
164
+ )
165
+ generate_btn = gr.Button("🎵 Generate Audio", variant="primary", size="lg")
166
+
167
+ with gr.TabItem("📻 Stream Longer Text"):
168
+ stream_output = gr.Audio(
169
+ label="Streaming Audio",
170
+ interactive=False,
171
+ streaming=True,
172
+ autoplay=True
173
+ )
174
+ with gr.Row():
175
+ stream_btn = gr.Button("▶️ Start Streaming", variant="primary")
176
+ stop_btn = gr.Button("⏹️ Stop", variant="stop")
177
+
178
+ # Footer
179
+ gr.Markdown("---")
180
+ gr.Markdown("### ✨ VoiceBloom - Bringing wisdom to life through the art of voice ✨")
181
+
182
+ # Set up event handlers
183
+ random_btn.click(fn=get_random_quote, outputs=[text_input])
184
+ confucius_btn.click(fn=get_confucius, outputs=[text_input])
185
+ fruit_btn.click(fn=get_fruit_wisdom, outputs=[text_input])
186
+
187
+ generate_btn.click(
188
+ fn=generate_audio,
189
+ inputs=[text_input, voice_dropdown, speed_slider],
190
+ outputs=[audio_output]
191
+ )
192
+
193
+ stream_event = stream_btn.click(
194
+ fn=stream_audio,
195
+ inputs=[text_input, voice_dropdown, speed_slider],
196
+ outputs=[stream_output]
197
+ )
198
+
199
  stop_btn.click(fn=None, cancels=stream_event)
200
 
201
+ if __name__ == "__main__":
202
+ app.launch()