Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -58,7 +58,7 @@ def generate_podcast_script(api_key, prompt, uploaded_file, duration, num_hosts)
|
|
58 |
Example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>."
|
59 |
|
60 |
Ensure content flows naturally and stays on topic. Match the script length to {duration} minutes.
|
61 |
-
{'Make sure the script is a monologue for one person.' if num_hosts == 1 else 'Ensure the dialogue alternates between two distinct voices.'}
|
62 |
"""
|
63 |
|
64 |
response = model.generate_content(prompt)
|
@@ -131,35 +131,47 @@ def redistribute_codes(code_list, snac_model):
|
|
131 |
return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
|
132 |
|
133 |
@spaces.GPU()
|
134 |
-
def generate_speech(text,
|
135 |
if not text.strip():
|
136 |
return None
|
137 |
|
138 |
try:
|
139 |
progress(0.1, "Processing text...")
|
140 |
-
|
|
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
-
|
160 |
-
|
161 |
|
162 |
-
return (24000,
|
163 |
except Exception as e:
|
164 |
print(f"Error generating speech: {e}")
|
165 |
return None
|
@@ -176,11 +188,17 @@ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
|
|
176 |
|
177 |
with gr.Column(scale=2):
|
178 |
script_output = gr.Textbox(label="Generated Script", lines=10)
|
179 |
-
|
180 |
choices=VOICES,
|
181 |
value="tara",
|
182 |
-
label="Voice",
|
183 |
-
info="Select the voice for speech generation"
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
)
|
185 |
|
186 |
with gr.Accordion("Advanced Settings", open=False):
|
@@ -220,7 +238,7 @@ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
|
|
220 |
|
221 |
submit_btn.click(
|
222 |
fn=generate_speech,
|
223 |
-
inputs=[script_output,
|
224 |
outputs=audio_output
|
225 |
)
|
226 |
|
|
|
58 |
Example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>."
|
59 |
|
60 |
Ensure content flows naturally and stays on topic. Match the script length to {duration} minutes.
|
61 |
+
{'Make sure the script is a monologue for one person.' if num_hosts == 1 else 'Ensure the dialogue alternates between two distinct voices, with one speaking on odd-numbered lines and the other on even-numbered lines.'}
|
62 |
"""
|
63 |
|
64 |
response = model.generate_content(prompt)
|
|
|
131 |
return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
|
132 |
|
133 |
@spaces.GPU()
|
134 |
+
def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
|
135 |
if not text.strip():
|
136 |
return None
|
137 |
|
138 |
try:
|
139 |
progress(0.1, "Processing text...")
|
140 |
+
lines = text.split('\n')
|
141 |
+
audio_samples = []
|
142 |
|
143 |
+
for i, line in enumerate(lines):
|
144 |
+
if not line.strip():
|
145 |
+
continue
|
146 |
+
|
147 |
+
voice = voice1 if i % 2 == 0 else voice2
|
148 |
+
input_ids, attention_mask = process_prompt(line, voice, tokenizer, device)
|
149 |
+
|
150 |
+
progress(0.3, f"Generating speech tokens for line {i+1}...")
|
151 |
+
with torch.no_grad():
|
152 |
+
generated_ids = model.generate(
|
153 |
+
input_ids,
|
154 |
+
attention_mask=attention_mask,
|
155 |
+
do_sample=True,
|
156 |
+
temperature=temperature,
|
157 |
+
top_p=top_p,
|
158 |
+
repetition_penalty=repetition_penalty,
|
159 |
+
max_new_tokens=max_new_tokens,
|
160 |
+
num_return_sequences=1,
|
161 |
+
eos_token_id=128258,
|
162 |
+
)
|
163 |
+
|
164 |
+
progress(0.6, f"Processing speech tokens for line {i+1}...")
|
165 |
+
code_list = parse_output(generated_ids)
|
166 |
+
|
167 |
+
progress(0.8, f"Converting line {i+1} to audio...")
|
168 |
+
line_audio = redistribute_codes(code_list, snac_model)
|
169 |
+
audio_samples.append(line_audio)
|
170 |
|
171 |
+
# Concatenate all audio samples
|
172 |
+
final_audio = np.concatenate(audio_samples)
|
173 |
|
174 |
+
return (24000, final_audio)
|
175 |
except Exception as e:
|
176 |
print(f"Error generating speech: {e}")
|
177 |
return None
|
|
|
188 |
|
189 |
with gr.Column(scale=2):
|
190 |
script_output = gr.Textbox(label="Generated Script", lines=10)
|
191 |
+
voice1 = gr.Dropdown(
|
192 |
choices=VOICES,
|
193 |
value="tara",
|
194 |
+
label="Voice 1",
|
195 |
+
info="Select the first voice for speech generation"
|
196 |
+
)
|
197 |
+
voice2 = gr.Dropdown(
|
198 |
+
choices=VOICES,
|
199 |
+
value="dan",
|
200 |
+
label="Voice 2",
|
201 |
+
info="Select the second voice for speech generation"
|
202 |
)
|
203 |
|
204 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
238 |
|
239 |
submit_btn.click(
|
240 |
fn=generate_speech,
|
241 |
+
inputs=[script_output, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts],
|
242 |
outputs=audio_output
|
243 |
)
|
244 |
|