podcast-generator

Sleeping

App Files Files Community

bluenevus commited on Apr 16

Commit

d9cc1e0

verified ·

1 Parent(s): 3450cf6

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -2

app.py CHANGED Viewed

@@ -53,6 +53,8 @@ def generate_podcast_script(api_key, prompt, uploaded_file, duration, num_hosts)
         Format: {'Monologue' if num_hosts == 1 else 'Alternating dialogue'} without speaker labels.
         Separate {'paragraphs' if num_hosts == 1 else 'lines'} with blank lines.
         Use emotion tags in angle brackets: <laugh>, <sigh>, <chuckle>, <cough>, <sniffle>, <groan>, <yawn>, <gasp>.
@@ -131,6 +133,7 @@ def redistribute_codes(code_list, snac_model):
     audio_hat = snac_model.decode(codes)
     return audio_hat.detach().squeeze().cpu().numpy()  # Always return CPU numpy array
 @spaces.GPU()
 def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
     if not text.strip():
@@ -145,7 +148,11 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
             if not line.strip():
                 continue
-            voice = voice1 if i % 2 == 0 else voice2
             input_ids, attention_mask = process_prompt(line, voice, tokenizer, device)
             progress(0.3, f"Generating speech tokens for line {i+1}...")
@@ -172,6 +179,11 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
         # Concatenate all audio samples
         final_audio = np.concatenate(audio_samples)
         return (24000, final_audio)
     except Exception as e:
         print(f"Error generating speech: {e}")
@@ -188,7 +200,6 @@ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
             generate_script_btn = gr.Button("Generate Podcast Script")
         with gr.Column(scale=2):
-            script_output = gr.Textbox(label="Generated Script", lines=10)
             voice1 = gr.Dropdown(
                 choices=VOICES,
                 value="tara",
@@ -229,6 +240,7 @@ with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
                 clear_btn = gr.Button("Clear")
         with gr.Column(scale=2):
             audio_output = gr.Audio(label="Generated Speech", type="numpy")
     generate_script_btn.click(

         Format: {'Monologue' if num_hosts == 1 else 'Alternating dialogue'} without speaker labels.
         Separate {'paragraphs' if num_hosts == 1 else 'lines'} with blank lines.
+        only provide the dialog for text to speech
         Use emotion tags in angle brackets: <laugh>, <sigh>, <chuckle>, <cough>, <sniffle>, <groan>, <yawn>, <gasp>.
     audio_hat = snac_model.decode(codes)
     return audio_hat.detach().squeeze().cpu().numpy()  # Always return CPU numpy array
+@spaces.GPU()
 @spaces.GPU()
 def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
     if not text.strip():
             if not line.strip():
                 continue
+            if num_hosts == "2":
+                voice = voice1 if i % 2 == 0 else voice2
+            else:
+                voice = voice1
             input_ids, attention_mask = process_prompt(line, voice, tokenizer, device)
             progress(0.3, f"Generating speech tokens for line {i+1}...")
         # Concatenate all audio samples
         final_audio = np.concatenate(audio_samples)
+        # Add a check for 15-second limitation
+        max_samples = 24000 * 15  # 15 seconds at 24kHz sample rate
+        if len(final_audio) > max_samples:
+            final_audio = final_audio[:max_samples]
         return (24000, final_audio)
     except Exception as e:
         print(f"Error generating speech: {e}")
             generate_script_btn = gr.Button("Generate Podcast Script")
         with gr.Column(scale=2):
             voice1 = gr.Dropdown(
                 choices=VOICES,
                 value="tara",
                 clear_btn = gr.Button("Clear")
         with gr.Column(scale=2):
+            script_output = gr.Textbox(label="Generated Script", lines=10)
             audio_output = gr.Audio(label="Generated Speech", type="numpy")
     generate_script_btn.click(