bluenevus commited on
Commit
d9cc1e0
·
verified ·
1 Parent(s): 3450cf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -53,6 +53,8 @@ def generate_podcast_script(api_key, prompt, uploaded_file, duration, num_hosts)
53
 
54
  Format: {'Monologue' if num_hosts == 1 else 'Alternating dialogue'} without speaker labels.
55
  Separate {'paragraphs' if num_hosts == 1 else 'lines'} with blank lines.
 
 
56
 
57
  Use emotion tags in angle brackets: <laugh>, <sigh>, <chuckle>, <cough>, <sniffle>, <groan>, <yawn>, <gasp>.
58
 
@@ -131,6 +133,7 @@ def redistribute_codes(code_list, snac_model):
131
  audio_hat = snac_model.decode(codes)
132
  return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
133
 
 
134
  @spaces.GPU()
135
  def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
136
  if not text.strip():
@@ -145,7 +148,11 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
145
  if not line.strip():
146
  continue
147
 
148
- voice = voice1 if i % 2 == 0 else voice2
 
 
 
 
149
  input_ids, attention_mask = process_prompt(line, voice, tokenizer, device)
150
 
151
  progress(0.3, f"Generating speech tokens for line {i+1}...")
@@ -172,6 +179,11 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
172
  # Concatenate all audio samples
173
  final_audio = np.concatenate(audio_samples)
174
 
 
 
 
 
 
175
  return (24000, final_audio)
176
  except Exception as e:
177
  print(f"Error generating speech: {e}")
@@ -188,7 +200,6 @@ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
188
  generate_script_btn = gr.Button("Generate Podcast Script")
189
 
190
  with gr.Column(scale=2):
191
- script_output = gr.Textbox(label="Generated Script", lines=10)
192
  voice1 = gr.Dropdown(
193
  choices=VOICES,
194
  value="tara",
@@ -229,6 +240,7 @@ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
229
  clear_btn = gr.Button("Clear")
230
 
231
  with gr.Column(scale=2):
 
232
  audio_output = gr.Audio(label="Generated Speech", type="numpy")
233
 
234
  generate_script_btn.click(
 
53
 
54
  Format: {'Monologue' if num_hosts == 1 else 'Alternating dialogue'} without speaker labels.
55
  Separate {'paragraphs' if num_hosts == 1 else 'lines'} with blank lines.
56
+
57
+ only provide the dialog for text to speech
58
 
59
  Use emotion tags in angle brackets: <laugh>, <sigh>, <chuckle>, <cough>, <sniffle>, <groan>, <yawn>, <gasp>.
60
 
 
133
  audio_hat = snac_model.decode(codes)
134
  return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
135
 
136
+ @spaces.GPU()
137
  @spaces.GPU()
138
  def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
139
  if not text.strip():
 
148
  if not line.strip():
149
  continue
150
 
151
+ if num_hosts == "2":
152
+ voice = voice1 if i % 2 == 0 else voice2
153
+ else:
154
+ voice = voice1
155
+
156
  input_ids, attention_mask = process_prompt(line, voice, tokenizer, device)
157
 
158
  progress(0.3, f"Generating speech tokens for line {i+1}...")
 
179
  # Concatenate all audio samples
180
  final_audio = np.concatenate(audio_samples)
181
 
182
+ # Add a check for 15-second limitation
183
+ max_samples = 24000 * 15 # 15 seconds at 24kHz sample rate
184
+ if len(final_audio) > max_samples:
185
+ final_audio = final_audio[:max_samples]
186
+
187
  return (24000, final_audio)
188
  except Exception as e:
189
  print(f"Error generating speech: {e}")
 
200
  generate_script_btn = gr.Button("Generate Podcast Script")
201
 
202
  with gr.Column(scale=2):
 
203
  voice1 = gr.Dropdown(
204
  choices=VOICES,
205
  value="tara",
 
240
  clear_btn = gr.Button("Clear")
241
 
242
  with gr.Column(scale=2):
243
+ script_output = gr.Textbox(label="Generated Script", lines=10)
244
  audio_output = gr.Audio(label="Generated Speech", type="numpy")
245
 
246
  generate_script_btn.click(