Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -53,6 +53,8 @@ def generate_podcast_script(api_key, prompt, uploaded_file, duration, num_hosts)
|
|
53 |
|
54 |
Format: {'Monologue' if num_hosts == 1 else 'Alternating dialogue'} without speaker labels.
|
55 |
Separate {'paragraphs' if num_hosts == 1 else 'lines'} with blank lines.
|
|
|
|
|
56 |
|
57 |
Use emotion tags in angle brackets: <laugh>, <sigh>, <chuckle>, <cough>, <sniffle>, <groan>, <yawn>, <gasp>.
|
58 |
|
@@ -131,6 +133,7 @@ def redistribute_codes(code_list, snac_model):
|
|
131 |
audio_hat = snac_model.decode(codes)
|
132 |
return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
|
133 |
|
|
|
134 |
@spaces.GPU()
|
135 |
def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
|
136 |
if not text.strip():
|
@@ -145,7 +148,11 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
|
|
145 |
if not line.strip():
|
146 |
continue
|
147 |
|
148 |
-
|
|
|
|
|
|
|
|
|
149 |
input_ids, attention_mask = process_prompt(line, voice, tokenizer, device)
|
150 |
|
151 |
progress(0.3, f"Generating speech tokens for line {i+1}...")
|
@@ -172,6 +179,11 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
|
|
172 |
# Concatenate all audio samples
|
173 |
final_audio = np.concatenate(audio_samples)
|
174 |
|
|
|
|
|
|
|
|
|
|
|
175 |
return (24000, final_audio)
|
176 |
except Exception as e:
|
177 |
print(f"Error generating speech: {e}")
|
@@ -188,7 +200,6 @@ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
|
|
188 |
generate_script_btn = gr.Button("Generate Podcast Script")
|
189 |
|
190 |
with gr.Column(scale=2):
|
191 |
-
script_output = gr.Textbox(label="Generated Script", lines=10)
|
192 |
voice1 = gr.Dropdown(
|
193 |
choices=VOICES,
|
194 |
value="tara",
|
@@ -229,6 +240,7 @@ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
|
|
229 |
clear_btn = gr.Button("Clear")
|
230 |
|
231 |
with gr.Column(scale=2):
|
|
|
232 |
audio_output = gr.Audio(label="Generated Speech", type="numpy")
|
233 |
|
234 |
generate_script_btn.click(
|
|
|
53 |
|
54 |
Format: {'Monologue' if num_hosts == 1 else 'Alternating dialogue'} without speaker labels.
|
55 |
Separate {'paragraphs' if num_hosts == 1 else 'lines'} with blank lines.
|
56 |
+
|
57 |
+
only provide the dialog for text to speech
|
58 |
|
59 |
Use emotion tags in angle brackets: <laugh>, <sigh>, <chuckle>, <cough>, <sniffle>, <groan>, <yawn>, <gasp>.
|
60 |
|
|
|
133 |
audio_hat = snac_model.decode(codes)
|
134 |
return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
|
135 |
|
136 |
+
@spaces.GPU()
|
137 |
@spaces.GPU()
|
138 |
def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
|
139 |
if not text.strip():
|
|
|
148 |
if not line.strip():
|
149 |
continue
|
150 |
|
151 |
+
if num_hosts == "2":
|
152 |
+
voice = voice1 if i % 2 == 0 else voice2
|
153 |
+
else:
|
154 |
+
voice = voice1
|
155 |
+
|
156 |
input_ids, attention_mask = process_prompt(line, voice, tokenizer, device)
|
157 |
|
158 |
progress(0.3, f"Generating speech tokens for line {i+1}...")
|
|
|
179 |
# Concatenate all audio samples
|
180 |
final_audio = np.concatenate(audio_samples)
|
181 |
|
182 |
+
# Add a check for 15-second limitation
|
183 |
+
max_samples = 24000 * 15 # 15 seconds at 24kHz sample rate
|
184 |
+
if len(final_audio) > max_samples:
|
185 |
+
final_audio = final_audio[:max_samples]
|
186 |
+
|
187 |
return (24000, final_audio)
|
188 |
except Exception as e:
|
189 |
print(f"Error generating speech: {e}")
|
|
|
200 |
generate_script_btn = gr.Button("Generate Podcast Script")
|
201 |
|
202 |
with gr.Column(scale=2):
|
|
|
203 |
voice1 = gr.Dropdown(
|
204 |
choices=VOICES,
|
205 |
value="tara",
|
|
|
240 |
clear_btn = gr.Button("Clear")
|
241 |
|
242 |
with gr.Column(scale=2):
|
243 |
+
script_output = gr.Textbox(label="Generated Script", lines=10)
|
244 |
audio_output = gr.Audio(label="Generated Speech", type="numpy")
|
245 |
|
246 |
generate_script_btn.click(
|