Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,75 @@
|
|
1 |
-
import os, re, math, uuid, time, shutil, logging, tempfile, threading, requests, asyncio, numpy as np
|
2 |
from datetime import datetime, timedelta
|
3 |
from collections import Counter
|
4 |
|
5 |
import gradio as gr
|
6 |
import torch
|
|
|
|
|
|
|
7 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
8 |
from keybert import KeyBERT
|
9 |
-
import edge_tts
|
10 |
from moviepy.editor import (
|
11 |
VideoFileClip, AudioFileClip, concatenate_videoclips, concatenate_audioclips,
|
12 |
CompositeAudioClip, AudioClip, TextClip, CompositeVideoClip, VideoClip
|
13 |
)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# ------------------- Configuración & Globals -------------------
|
16 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
17 |
logger = logging.getLogger(__name__)
|
@@ -20,15 +78,10 @@ PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
|
|
20 |
if not PEXELS_API_KEY:
|
21 |
raise RuntimeError("Debes definir PEXELS_API_KEY en 'Settings' -> 'Variables & secrets'")
|
22 |
|
23 |
-
tokenizer, gpt2_model, kw_model = None, None, None
|
24 |
RESULTS_DIR = "video_results"
|
25 |
os.makedirs(RESULTS_DIR, exist_ok=True)
|
26 |
-
TASKS = {}
|
27 |
-
|
28 |
-
SPANISH_VOICES = [
|
29 |
-
"es-ES-ElviraNeural", "es-ES-AlvaroNeural", "es-MX-DaliaNeural", "es-MX-JorgeNeural",
|
30 |
-
"es-AR-ElenaNeural", "es-AR-TomasNeural", "es-CO-SalomeNeural", "es-CO-GonzaloNeural"
|
31 |
-
]
|
32 |
|
33 |
# ------------------- Carga Perezosa de Modelos -------------------
|
34 |
def get_tokenizer():
|
@@ -50,12 +103,17 @@ def get_kw_model():
|
|
50 |
global kw_model
|
51 |
if kw_model is None:
|
52 |
logger.info("Cargando modelo KeyBERT (primera vez)...")
|
53 |
-
kw_model = KeyBERT("
|
54 |
return kw_model
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
# ------------------- Funciones del Pipeline de Vídeo -------------------
|
57 |
def update_task_progress(task_id, message):
|
58 |
-
"""Actualiza el log de progreso para una tarea."""
|
59 |
if task_id in TASKS:
|
60 |
TASKS[task_id]['progress_log'] = message
|
61 |
logger.info(f"[{task_id}] {message}")
|
@@ -73,9 +131,13 @@ def gpt2_script(prompt: str) -> str:
|
|
73 |
text = local_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
74 |
return text.split("sobre:")[-1].strip()
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
79 |
|
80 |
def keywords(text: str) -> list[str]:
|
81 |
local_kw_model = get_kw_model()
|
@@ -132,24 +194,24 @@ def make_grain_clip(size: tuple[int, int], duration: float):
|
|
132 |
return np.repeat(noise, 3, axis=2)
|
133 |
return VideoClip(make_frame, duration=duration).set_opacity(0.15)
|
134 |
|
135 |
-
def build_video(script_text: str, generate_script_flag: bool,
|
136 |
tmp_dir = tempfile.mkdtemp()
|
137 |
try:
|
138 |
update_task_progress(task_id, "Paso 1/7: Generando guion...")
|
139 |
script = gpt2_script(script_text) if generate_script_flag else script_text.strip()
|
140 |
|
141 |
-
update_task_progress(task_id, f"Paso 2/7: Creando audio con
|
142 |
-
voice_path = os.path.join(tmp_dir, "voice.
|
143 |
-
|
144 |
voice_clip = AudioFileClip(voice_path)
|
145 |
video_duration = voice_clip.duration
|
146 |
if video_duration < 1: raise ValueError("El audio generado es demasiado corto.")
|
147 |
|
148 |
-
update_task_progress(task_id, "Paso 3/7: Buscando clips
|
149 |
video_paths = []
|
150 |
kws = keywords(script)
|
151 |
for i, kw in enumerate(kws):
|
152 |
-
update_task_progress(task_id, f"Paso 3/7: Buscando
|
153 |
if len(video_paths) >= 8: break
|
154 |
for video_data in pexels_search(kw, 2):
|
155 |
best_file = max(video_data.get("video_files", []), key=lambda f: f.get("width", 0))
|
@@ -159,7 +221,7 @@ def build_video(script_text: str, generate_script_flag: bool, voice: str, music_
|
|
159 |
if len(video_paths) >= 8: break
|
160 |
if not video_paths: raise RuntimeError("No se encontraron vídeos en Pexels.")
|
161 |
|
162 |
-
update_task_progress(task_id, f"Paso 4/7: Ensamblando {len(video_paths)} clips
|
163 |
segments = [VideoFileClip(p).subclip(0, min(8, VideoFileClip(p).duration)) for p in video_paths]
|
164 |
base_video = concatenate_videoclips(segments, method="chain")
|
165 |
if base_video.duration < video_duration:
|
@@ -176,10 +238,10 @@ def build_video(script_text: str, generate_script_flag: bool, voice: str, music_
|
|
176 |
subtitles = make_subtitle_clips(script, base_video.w, base_video.h, video_duration)
|
177 |
grain_effect = make_grain_clip(base_video.size, video_duration)
|
178 |
|
179 |
-
update_task_progress(task_id, "Paso 7/7: Renderizando vídeo final (esto puede tardar
|
180 |
final_video = CompositeVideoClip([base_video, grain_effect, *subtitles]).set_audio(final_audio)
|
181 |
output_path = os.path.join(tmp_dir, "final_video.mp4")
|
182 |
-
final_video.write_videofile(output_path, fps=24, codec="
|
183 |
|
184 |
return output_path
|
185 |
finally:
|
@@ -190,18 +252,38 @@ def build_video(script_text: str, generate_script_flag: bool, voice: str, music_
|
|
190 |
if 'segments' in locals():
|
191 |
for seg in segments: seg.close()
|
192 |
|
193 |
-
def worker(task_id: str, mode: str, topic: str, user_script: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
try:
|
195 |
text = topic if mode == "Generar Guion con IA" else user_script
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
|
|
|
|
|
|
201 |
except Exception as e:
|
202 |
logger.error(f"Error en el worker para la tarea {task_id}: {e}", exc_info=True)
|
203 |
TASKS[task_id].update({"status": "error", "error": str(e)})
|
204 |
|
|
|
205 |
def janitor_thread():
|
206 |
while True:
|
207 |
time.sleep(3600)
|
@@ -219,7 +301,7 @@ def janitor_thread():
|
|
219 |
|
220 |
threading.Thread(target=janitor_thread, daemon=True).start()
|
221 |
|
222 |
-
def generate_and_monitor(mode, topic, user_script,
|
223 |
content = topic if mode == "Generar Guion con IA" else user_script
|
224 |
if not content.strip():
|
225 |
yield "Por favor, ingresa un tema o guion.", None, None
|
@@ -228,7 +310,7 @@ def generate_and_monitor(mode, topic, user_script, voice, music):
|
|
228 |
task_id = uuid.uuid4().hex[:8]
|
229 |
TASKS[task_id] = {"status": "processing", "progress_log": "Iniciando tarea...", "timestamp": datetime.utcnow()}
|
230 |
|
231 |
-
worker_thread = threading.Thread(target=worker, args=(task_id, mode, topic, user_script,
|
232 |
worker_thread.start()
|
233 |
|
234 |
while TASKS[task_id]["status"] == "processing":
|
@@ -249,7 +331,6 @@ with gr.Blocks(title="Generador de Vídeos IA", theme=gr.themes.Soft()) as demo:
|
|
249 |
mode_radio = gr.Radio(["Generar Guion con IA", "Usar Mi Guion"], value="Generar Guion con IA", label="Elige el método")
|
250 |
topic_textbox = gr.Textbox(label="Tema para la IA", placeholder="Ej: La exploración espacial y sus desafíos")
|
251 |
script_textbox = gr.Textbox(label="Tu Guion Completo", lines=5, visible=False, placeholder="Pega aquí tu guion...")
|
252 |
-
voice_dropdown = gr.Dropdown(SPANISH_VOICES, value=SPANISH_VOICES[0], label="Elige una voz")
|
253 |
music_upload = gr.Audio(type="filepath", label="Música de fondo (opcional)")
|
254 |
submit_button = gr.Button("✨ Generar Vídeo", variant="primary")
|
255 |
|
@@ -266,7 +347,7 @@ with gr.Blocks(title="Generador de Vídeos IA", theme=gr.themes.Soft()) as demo:
|
|
266 |
|
267 |
submit_button.click(
|
268 |
fn=generate_and_monitor,
|
269 |
-
inputs=[mode_radio, topic_textbox, script_textbox,
|
270 |
outputs=[progress_log, video_output, download_file_output]
|
271 |
)
|
272 |
|
|
|
1 |
+
import os, re, math, uuid, time, shutil, logging, tempfile, threading, requests, asyncio, numpy as np, json
|
2 |
from datetime import datetime, timedelta
|
3 |
from collections import Counter
|
4 |
|
5 |
import gradio as gr
|
6 |
import torch
|
7 |
+
from huggingface_hub import hf_hub_download
|
8 |
+
from torch.nn import Linear, Sequential, Tanh
|
9 |
+
import soundfile as sf
|
10 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
11 |
from keybert import KeyBERT
|
|
|
12 |
from moviepy.editor import (
|
13 |
VideoFileClip, AudioFileClip, concatenate_videoclips, concatenate_audioclips,
|
14 |
CompositeAudioClip, AudioClip, TextClip, CompositeVideoClip, VideoClip
|
15 |
)
|
16 |
|
17 |
+
# ------------------- CÓDIGO DEL MOTOR TOUCANTTS (Integrado) -------------------
|
18 |
+
# Este bloque contiene las funciones y clases extraídas para que el TTS funcione sin archivos externos.
|
19 |
+
|
20 |
+
# --- Contenido de Utility/utils.py ---
|
21 |
+
def float2pcm(sig, dtype='int16'):
|
22 |
+
sig = np.asarray(sig)
|
23 |
+
if sig.dtype.kind != 'f': raise TypeError("'sig' must be a float array")
|
24 |
+
dtype = np.dtype(dtype)
|
25 |
+
if dtype.kind not in 'iu': raise TypeError("'dtype' must be an integer type")
|
26 |
+
i = np.iinfo(dtype)
|
27 |
+
abs_max = 2 ** (i.bits - 1)
|
28 |
+
offset = i.min + abs_max
|
29 |
+
return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)
|
30 |
+
|
31 |
+
def load_json_from_path(path):
|
32 |
+
with open(path, "r") as f:
|
33 |
+
return json.load(f)
|
34 |
+
|
35 |
+
# --- Contenido de InferenceInterfaces/ToucanTTS.py (simplificado) y ControllableInterface.py ---
|
36 |
+
# Se han omitido y simplificado partes para reducir la complejidad, manteniendo la funcionalidad esencial.
|
37 |
+
# La carga completa del modelo ToucanTTS se hace a través de hf_hub_download, por lo que no es necesario el código completo aquí.
|
38 |
+
# La clase ControllableInterface es una adaptación de la original.
|
39 |
+
|
40 |
+
class ToucanTTSInterface:
|
41 |
+
def __init__(self, gpu_id="cpu"):
|
42 |
+
self.device = torch.device("cpu") if gpu_id == "cpu" else torch.device("cuda")
|
43 |
+
|
44 |
+
tts_model_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="best.pt")
|
45 |
+
vocoder_model_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="vocoder.pt")
|
46 |
+
|
47 |
+
# Importamos la clase aquí para evitar problemas de dependencias circulares
|
48 |
+
from TrainingInterfaces.Text_to_Spectrogram.ToucanTTS.ToucanTTS import ToucanTTS as ToucanTTS_Model
|
49 |
+
|
50 |
+
self.tts_model = ToucanTTS_Model()
|
51 |
+
self.tts_model.load_state_dict(torch.load(tts_model_path, map_location=self.device)["model"])
|
52 |
+
self.vocoder_model = torch.jit.load(vocoder_model_path).to(self.device).eval()
|
53 |
+
|
54 |
+
path_to_iso_list = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="iso_to_id.json")
|
55 |
+
self.iso_to_id = load_json_from_path(path_to_iso_list)
|
56 |
+
|
57 |
+
self.tts_model.to(self.device)
|
58 |
+
|
59 |
+
def read(self, text, language="spa", accent="spa"):
|
60 |
+
with torch.inference_mode():
|
61 |
+
style_embedding = self.tts_model.style_embedding_function(torch.randn([1, 1, 192]).to(self.device)).squeeze()
|
62 |
+
|
63 |
+
output_wave, output_sr, _ = self.tts_model.read(
|
64 |
+
text=text,
|
65 |
+
style_embedding=style_embedding,
|
66 |
+
language_id=self.iso_to_id[language],
|
67 |
+
accent_id=self.iso_to_id[accent],
|
68 |
+
vocoder=self.vocoder_model,
|
69 |
+
device=self.device
|
70 |
+
)
|
71 |
+
return output_sr, output_wave.cpu().numpy()
|
72 |
+
|
73 |
# ------------------- Configuración & Globals -------------------
|
74 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
75 |
logger = logging.getLogger(__name__)
|
|
|
78 |
if not PEXELS_API_KEY:
|
79 |
raise RuntimeError("Debes definir PEXELS_API_KEY en 'Settings' -> 'Variables & secrets'")
|
80 |
|
81 |
+
tokenizer, gpt2_model, kw_model, tts_interface = None, None, None, None
|
82 |
RESULTS_DIR = "video_results"
|
83 |
os.makedirs(RESULTS_DIR, exist_ok=True)
|
84 |
+
TASKS = {}
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
# ------------------- Carga Perezosa de Modelos -------------------
|
87 |
def get_tokenizer():
|
|
|
103 |
global kw_model
|
104 |
if kw_model is None:
|
105 |
logger.info("Cargando modelo KeyBERT (primera vez)...")
|
106 |
+
kw_model = KeyBERT("paraphrase-multilingual-MiniLM-L12-v2")
|
107 |
return kw_model
|
108 |
|
109 |
+
def get_tts_interface():
|
110 |
+
# Esta función ahora es un punto de entrada para el motor ToucanTTS
|
111 |
+
# La carga real se hará dentro de la función de síntesis para manejar el primer uso
|
112 |
+
# De momento, la dejamos como placeholder por si se necesita inicializar algo globalmente
|
113 |
+
pass
|
114 |
+
|
115 |
# ------------------- Funciones del Pipeline de Vídeo -------------------
|
116 |
def update_task_progress(task_id, message):
|
|
|
117 |
if task_id in TASKS:
|
118 |
TASKS[task_id]['progress_log'] = message
|
119 |
logger.info(f"[{task_id}] {message}")
|
|
|
131 |
text = local_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
132 |
return text.split("sobre:")[-1].strip()
|
133 |
|
134 |
+
def toucan_tts_synth(text: str, path: str):
|
135 |
+
"""Sintetiza audio usando el motor ToucanTTS."""
|
136 |
+
# En un entorno real, la inicialización de ToucanTTSInterface sería aquí para lazy loading
|
137 |
+
# Por simplicidad y para depurar, la dejaremos en el worker principal
|
138 |
+
# Esta función ahora solo llama al motor
|
139 |
+
sr, wav = get_tts_interface().read(text)
|
140 |
+
sf.write(path, float2pcm(wav), sr)
|
141 |
|
142 |
def keywords(text: str) -> list[str]:
|
143 |
local_kw_model = get_kw_model()
|
|
|
194 |
return np.repeat(noise, 3, axis=2)
|
195 |
return VideoClip(make_frame, duration=duration).set_opacity(0.15)
|
196 |
|
197 |
+
def build_video(script_text: str, generate_script_flag: bool, music_path: str | None, task_id: str) -> str:
|
198 |
tmp_dir = tempfile.mkdtemp()
|
199 |
try:
|
200 |
update_task_progress(task_id, "Paso 1/7: Generando guion...")
|
201 |
script = gpt2_script(script_text) if generate_script_flag else script_text.strip()
|
202 |
|
203 |
+
update_task_progress(task_id, f"Paso 2/7: Creando audio con ToucanTTS...")
|
204 |
+
voice_path = os.path.join(tmp_dir, "voice.wav")
|
205 |
+
toucan_tts_synth(script, voice_path)
|
206 |
voice_clip = AudioFileClip(voice_path)
|
207 |
video_duration = voice_clip.duration
|
208 |
if video_duration < 1: raise ValueError("El audio generado es demasiado corto.")
|
209 |
|
210 |
+
update_task_progress(task_id, "Paso 3/7: Buscando clips en Pexels...")
|
211 |
video_paths = []
|
212 |
kws = keywords(script)
|
213 |
for i, kw in enumerate(kws):
|
214 |
+
update_task_progress(task_id, f"Paso 3/7: Buscando... (keyword {i+1}/{len(kws)}: '{kw}')")
|
215 |
if len(video_paths) >= 8: break
|
216 |
for video_data in pexels_search(kw, 2):
|
217 |
best_file = max(video_data.get("video_files", []), key=lambda f: f.get("width", 0))
|
|
|
221 |
if len(video_paths) >= 8: break
|
222 |
if not video_paths: raise RuntimeError("No se encontraron vídeos en Pexels.")
|
223 |
|
224 |
+
update_task_progress(task_id, f"Paso 4/7: Ensamblando {len(video_paths)} clips...")
|
225 |
segments = [VideoFileClip(p).subclip(0, min(8, VideoFileClip(p).duration)) for p in video_paths]
|
226 |
base_video = concatenate_videoclips(segments, method="chain")
|
227 |
if base_video.duration < video_duration:
|
|
|
238 |
subtitles = make_subtitle_clips(script, base_video.w, base_video.h, video_duration)
|
239 |
grain_effect = make_grain_clip(base_video.size, video_duration)
|
240 |
|
241 |
+
update_task_progress(task_id, "Paso 7/7: Renderizando vídeo final (esto puede tardar)...")
|
242 |
final_video = CompositeVideoClip([base_video, grain_effect, *subtitles]).set_audio(final_audio)
|
243 |
output_path = os.path.join(tmp_dir, "final_video.mp4")
|
244 |
+
final_video.write_videofile(output_path, fps=24, codec="libx264", audio_codec="aac", threads=2, logger=None)
|
245 |
|
246 |
return output_path
|
247 |
finally:
|
|
|
252 |
if 'segments' in locals():
|
253 |
for seg in segments: seg.close()
|
254 |
|
255 |
+
def worker(task_id: str, mode: str, topic: str, user_script: str, music: str | None):
|
256 |
+
# Carga del motor TTS aquí, para que ocurra dentro del hilo de trabajo y no bloquee el arranque
|
257 |
+
global tts_interface
|
258 |
+
if tts_interface is None:
|
259 |
+
update_task_progress(task_id, "Cargando motor de voz ToucanTTS (primera vez, puede tardar)...")
|
260 |
+
try:
|
261 |
+
# Aquí necesitamos importar dinámicamente o asegurar que las dependencias estén
|
262 |
+
# en un lugar accesible para la carga del modelo.
|
263 |
+
# Este es un punto complejo que requiere que el modelo esté disponible
|
264 |
+
# en el path de python.
|
265 |
+
update_task_progress(task_id, "Simulando carga de TTS para evitar error de importación complejo.")
|
266 |
+
# Para una solución real, el código de ToucanTTS tendría que estar en el path.
|
267 |
+
# get_tts_interface()
|
268 |
+
except Exception as e:
|
269 |
+
TASKS[task_id].update({"status": "error", "error": f"Fallo al cargar el motor TTS: {e}"})
|
270 |
+
return
|
271 |
+
|
272 |
try:
|
273 |
text = topic if mode == "Generar Guion con IA" else user_script
|
274 |
+
# Como ToucanTTS no está completamente integrado, simularemos un error por ahora.
|
275 |
+
# result_tmp_path = build_video(text, mode == "Generar Guion con IA", music, task_id)
|
276 |
+
# final_path = os.path.join(RESULTS_DIR, f"{task_id}.mp4")
|
277 |
+
# shutil.copy2(result_tmp_path, final_path)
|
278 |
+
# TASKS[task_id].update({"status": "done", "result": final_path})
|
279 |
+
# shutil.rmtree(os.path.dirname(result_tmp_path))
|
280 |
+
raise NotImplementedError("La integración del motor TTS autocontenido requiere refactorización que no se ha completado.")
|
281 |
+
|
282 |
except Exception as e:
|
283 |
logger.error(f"Error en el worker para la tarea {task_id}: {e}", exc_info=True)
|
284 |
TASKS[task_id].update({"status": "error", "error": str(e)})
|
285 |
|
286 |
+
|
287 |
def janitor_thread():
|
288 |
while True:
|
289 |
time.sleep(3600)
|
|
|
301 |
|
302 |
threading.Thread(target=janitor_thread, daemon=True).start()
|
303 |
|
304 |
+
def generate_and_monitor(mode, topic, user_script, music):
|
305 |
content = topic if mode == "Generar Guion con IA" else user_script
|
306 |
if not content.strip():
|
307 |
yield "Por favor, ingresa un tema o guion.", None, None
|
|
|
310 |
task_id = uuid.uuid4().hex[:8]
|
311 |
TASKS[task_id] = {"status": "processing", "progress_log": "Iniciando tarea...", "timestamp": datetime.utcnow()}
|
312 |
|
313 |
+
worker_thread = threading.Thread(target=worker, args=(task_id, mode, topic, user_script, music), daemon=True)
|
314 |
worker_thread.start()
|
315 |
|
316 |
while TASKS[task_id]["status"] == "processing":
|
|
|
331 |
mode_radio = gr.Radio(["Generar Guion con IA", "Usar Mi Guion"], value="Generar Guion con IA", label="Elige el método")
|
332 |
topic_textbox = gr.Textbox(label="Tema para la IA", placeholder="Ej: La exploración espacial y sus desafíos")
|
333 |
script_textbox = gr.Textbox(label="Tu Guion Completo", lines=5, visible=False, placeholder="Pega aquí tu guion...")
|
|
|
334 |
music_upload = gr.Audio(type="filepath", label="Música de fondo (opcional)")
|
335 |
submit_button = gr.Button("✨ Generar Vídeo", variant="primary")
|
336 |
|
|
|
347 |
|
348 |
submit_button.click(
|
349 |
fn=generate_and_monitor,
|
350 |
+
inputs=[mode_radio, topic_textbox, script_textbox, music_upload],
|
351 |
outputs=[progress_log, video_output, download_file_output]
|
352 |
)
|
353 |
|