freddyaboulton HF Staff commited on
Commit
8ee549b
·
1 Parent(s): 988d6c6
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -17,6 +17,21 @@ from pydub import AudioSegment
17
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
18
  from transformers.generation.streamers import BaseStreamer
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  device = "cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
21
  torch_dtype = torch.float16 if device != "cpu" else torch.float32
22
 
@@ -263,7 +278,7 @@ def generate_base(text, description, play_steps_in_s=2.0):
263
 
264
  for new_audio in streamer:
265
  print(f"Sample of length {round(new_audio.shape[0] / sampling_rate, 2)} seconds ready")
266
- yield numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
267
 
268
 
269
  css = """
@@ -343,12 +358,13 @@ with gr.Blocks(css=css) as block:
343
  play_seconds = gr.Slider(0.2, 3.0, value=0.2, step=0.2, label="Streaming interval in seconds", info="Lower = shorter chunks, lower latency, more codec steps")
344
  run_button = gr.Button("Generate Audio", variant="primary")
345
  with gr.Column():
346
- audio_out = gr.Audio(label="Parler-TTS generation", format="mp3", elem_id="audio_out", streaming=True, autoplay=True)
 
347
 
348
  inputs = [input_text, description, play_seconds]
349
  outputs = [audio_out]
350
  gr.Examples(examples=examples, fn=generate_base, inputs=inputs, outputs=outputs, cache_examples=False)
351
- run_button.click(fn=generate_base, inputs=inputs, outputs=outputs, queue=True)
352
 
353
  gr.HTML(
354
  """
 
17
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
18
  from transformers.generation.streamers import BaseStreamer
19
 
20
+ account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
21
+ auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
22
+
23
+ if account_sid and auth_token:
24
+ client = Client(account_sid, auth_token)
25
+
26
+ token = client.tokens.create()
27
+
28
+ rtc_configuration = {
29
+ "iceServers": token.ice_servers,
30
+ "iceTransportPolicy": "relay",
31
+ }
32
+ else:
33
+ rtc_configuration = None
34
+
35
  device = "cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
36
  torch_dtype = torch.float16 if device != "cpu" else torch.float32
37
 
 
278
 
279
  for new_audio in streamer:
280
  print(f"Sample of length {round(new_audio.shape[0] / sampling_rate, 2)} seconds ready")
281
+ yield (sampling_rate, new_audio)
282
 
283
 
284
  css = """
 
358
  play_seconds = gr.Slider(0.2, 3.0, value=0.2, step=0.2, label="Streaming interval in seconds", info="Lower = shorter chunks, lower latency, more codec steps")
359
  run_button = gr.Button("Generate Audio", variant="primary")
360
  with gr.Column():
361
+ audio_out = WebRTC(label="Parler-TTS generation", modality="audio", mode="receive",
362
+ rtc_configuration=rtc_configuration)
363
 
364
  inputs = [input_text, description, play_seconds]
365
  outputs = [audio_out]
366
  gr.Examples(examples=examples, fn=generate_base, inputs=inputs, outputs=outputs, cache_examples=False)
367
+ audio_out.stream(fn=generate_base, inputs=inputs, outputs=audio_out, trigger=run_button.click)
368
 
369
  gr.HTML(
370
  """