Bmo411 commited on
Commit
2690696
verified
1 Parent(s): bed59c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -6,16 +6,14 @@ import torch
6
  from huggingface_hub import hf_hub_download
7
  from speechbrain.inference.TTS import Tacotron2
8
 
9
- # Cargar Tacotron2
10
  tacotron2 = Tacotron2.from_hparams(
11
  source="speechbrain/tts-tacotron2-ljspeech",
12
  savedir="tmpdir_tts",
13
  run_opts={"device": "cpu"}
14
  )
15
 
16
- # Cargar tu modelo generator.keras
17
- # Cargar tu generator.keras desde HuggingFace
18
- # Cargar tu modelo generator.keras desde HuggingFace
19
  model_path = hf_hub_download(
20
  repo_id="Bmo411/WGAN",
21
  filename="generator_epoch_3500.keras"
@@ -23,35 +21,36 @@ model_path = hf_hub_download(
23
 
24
  generator = keras.models.load_model(model_path, compile=False)
25
 
26
- # Funci贸n de generaci贸n
27
  def text_to_audio(text):
28
- # 1. Convertir texto a mel-spectrograma
29
  mel_output, _, _ = tacotron2.encode_text(text)
30
  mel = mel_output.detach().cpu().numpy().astype(np.float32) # (80, frames)
31
-
32
- # 2. Preparar para generator
33
- mel_input = np.expand_dims(mel,axis=0) # (1, 80, frames, 1)
34
- mel_input = np.expand_dims(mel_input,axis=-1) # (1, 80, frames, 1)
35
 
36
- # 3. Usar generator para generar audio
 
 
 
 
37
  fake_audio = generator(mel_input, training=False)
38
- fake_audio = np.squeeze(fake_audio, axis=-1) # (samples,)
 
39
 
40
- # 4. Asegurar que est茅 en [-1, 1] para audio
41
  fake_audio = np.clip(fake_audio, -1.0, 1.0)
42
 
43
- # 5. Devolver audio como (numpy_array, sample_rate)
44
- return fake_audio, 8000 # tu modelo est谩 entrenado en 8 kHz, 驴verdad?
45
 
46
- # Interfaz Gradio
47
  interface = gr.Interface(
48
  fn=text_to_audio,
49
- inputs=gr.Textbox(lines=1, placeholder="Escribe un n煤mero (ej. nine)"),
50
  outputs=gr.Audio(type="numpy", label="Audio generado"),
51
- title="Demo de TTS con Tacotron2 + Generator",
52
- description="Convierte texto en audio usando Tacotron2 + tu modelo generator."
53
  )
54
 
55
- # Lanzar app
56
  if __name__ == "__main__":
57
- interface.launch()
 
6
  from huggingface_hub import hf_hub_download
7
  from speechbrain.inference.TTS import Tacotron2
8
 
9
+ # Cargar modelo Tacotron2
10
  tacotron2 = Tacotron2.from_hparams(
11
  source="speechbrain/tts-tacotron2-ljspeech",
12
  savedir="tmpdir_tts",
13
  run_opts={"device": "cpu"}
14
  )
15
 
16
+ # Descargar y cargar el modelo Generator entrenado
 
 
17
  model_path = hf_hub_download(
18
  repo_id="Bmo411/WGAN",
19
  filename="generator_epoch_3500.keras"
 
21
 
22
  generator = keras.models.load_model(model_path, compile=False)
23
 
24
+ # Funci贸n para convertir texto a audio
25
  def text_to_audio(text):
26
+ # Convertir texto a mel-spectrograma
27
  mel_output, _, _ = tacotron2.encode_text(text)
28
  mel = mel_output.detach().cpu().numpy().astype(np.float32) # (80, frames)
 
 
 
 
29
 
30
+ # Preparar mel para el generador
31
+ mel_input = np.expand_dims(mel, axis=0) # (1, 80, frames)
32
+ mel_input = np.expand_dims(mel_input, axis=-1) # (1, 80, frames, 1)
33
+
34
+ # Generar audio
35
  fake_audio = generator(mel_input, training=False)
36
+ fake_audio = tf.squeeze(fake_audio, axis=0).numpy() # (samples, 1)
37
+ fake_audio = np.squeeze(fake_audio, axis=-1) # (samples,)
38
 
39
+ # Asegurar que est茅 en rango [-1, 1]
40
  fake_audio = np.clip(fake_audio, -1.0, 1.0)
41
 
42
+ # Retornar audio y sample rate
43
+ return fake_audio, 8000 # Asumiendo salida a 8kHz
44
 
45
+ # Crear interfaz en Gradio
46
  interface = gr.Interface(
47
  fn=text_to_audio,
48
+ inputs=gr.Textbox(lines=1, placeholder="Escribe algo (ej. 'nine')"),
49
  outputs=gr.Audio(type="numpy", label="Audio generado"),
50
+ title="Demo de TTS con Tacotron2 + Generador",
51
+ description="Convierte texto en audio usando Tacotron2 + tu modelo Generator entrenado."
52
  )
53
 
54
+ # Lanzar aplicaci贸n
55
  if __name__ == "__main__":
56
+ interface.launch()