Spaces:

somosnlp-hackathon-2025
/

IberoTales-Error

Paused

App Files Files Community

daqc commited on Jun 4

Commit

f68f560

verified ·

1 Parent(s): 4787997

Update app.py

Browse files

Files changed (1) hide show

app.py +185 -40

app.py CHANGED Viewed

@@ -1,64 +1,209 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
-    respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
             value=0.95,
             step=0.05,
-            label="Top-p (nucleus sampling)",
         ),
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

+import os
 import gradio as gr
+import torch
+import torch._dynamo
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
+import spaces
+# Desactivar TorchDynamo para evitar errores de compilación
+torch._dynamo.config.suppress_errors = True
+torch._dynamo.disable()
+# Configuración
+MODEL_ID = "somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es"
+MAX_MAX_NEW_TOKENS = 4096
+DEFAULT_MAX_NEW_TOKENS = 2048
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "2048"))
+# System prompt personalizado
+DEFAULT_SYSTEM_MESSAGE = """Resuelve el siguiente problema.
+Primero, piensa en voz alta qué debes hacer, paso por paso y de forma resumida, entre <think> y </think>.
+Luego, da la respuesta final entre <SOLUTION> y </SOLUTION>.
+No escribas nada fuera de ese formato."""
+# Variables globales
+model = None
+tokenizer = None
+def load_model():
+    """Cargar modelo y tokenizador"""
+    global model, tokenizer
+    if torch.cuda.is_available():
+        print(f"Cargando modelo: {MODEL_ID}")
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_ID,
+                torch_dtype=torch.float32,
+                device_map="auto",
+                trust_remote_code=True,
+            )
+            if tokenizer.pad_token is None:
+                tokenizer.pad_token = tokenizer.eos_token
+            print("¡Modelo cargado exitosamente!")
+            return True
+        except Exception as e:
+            print(f"Error al cargar el modelo: {e}")
+            return False
+    else:
+        print("CUDA no disponible")
+        return False
+# Cargar modelo al iniciar
+model_loaded = load_model()
+@spaces.GPU
+def generate(
+    message: str,
+    history: list,
+    system_message: str,
+    max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
+    temperature: float = 0.7,
+    top_p: float = 0.95,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2,
 ):
+    """Generar historia con streaming"""
+    global model, tokenizer
+    if model is None or tokenizer is None:
+        yield "Error: Modelo no disponible. Por favor, reinicia la aplicación."
+        return
+    conversation = []
+    if system_message:
+        conversation.append({"role": "system", "content": system_message})
+    for msg in history:
+        if isinstance(msg, dict) and "role" in msg and "content" in msg:
+            conversation.append(msg)
+    conversation.append({"role": "user", "content": message})
+    try:
+        input_ids = tokenizer.apply_chat_template(
+            conversation,
+            return_tensors="pt",
+            add_generation_prompt=True,
+        )
+        if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+            input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+            gr.Warning(f"Conversación recortada a {MAX_INPUT_TOKEN_LENGTH} tokens.")
+        input_ids = input_ids.to(model.device)
+        attention_mask = torch.ones_like(input_ids, device=model.device)
+        streamer = TextIteratorStreamer(
+            tokenizer,
+            timeout=30.0,
+            skip_prompt=True,
+            skip_special_tokens=True
+        )
+        generate_kwargs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "streamer": streamer,
+            "max_new_tokens": max_new_tokens,
+            "do_sample": True,
+            "top_p": top_p,
+            "top_k": top_k,
+            "temperature": temperature,
+            "repetition_penalty": repetition_penalty,
+            "pad_token_id": tokenizer.eos_token_id,
+            "eos_token_id": tokenizer.eos_token_id,
+        }
+        generation_thread = Thread(target=model.generate, kwargs=generate_kwargs)
+        generation_thread.start()
+        outputs = []
+        try:
+            for new_text in streamer:
+                outputs.append(new_text)
+                yield "".join(outputs)
+        except Exception as e:
+            yield f"Error durante la generación: {str(e)}"
+        finally:
+            generation_thread.join(timeout=1)
+    except Exception as e:
+        yield f"Error: {str(e)}"
+# Crear interfaz de chat
 demo = gr.ChatInterface(
+    fn=generate,
+    title="Iberotales: Mitos y Leyendas Iberoamericanas",
+    description="Genera historias y personajes basados en el patrimonio cultural de Iberoamérica usando GRPO.",
+    chatbot=gr.Chatbot(
+        height=600,
+        show_copy_button=True,
+    ),
+    textbox=gr.Textbox(
+        placeholder="Escribe una historia o personaje que quieras generar...",
+        scale=7
+    ),
     additional_inputs=[
+        gr.Textbox(
+            value=DEFAULT_SYSTEM_MESSAGE,
+            label="Mensaje del sistema (formato estructurado requerido)"
+        ),
+        gr.Slider(
+            label="Máximo de tokens",
+            minimum=100,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=50,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
         gr.Slider(
+            label="Temperatura",
+            minimum=0.1,
+            maximum=2.0,
+            step=0.1,
+            value=0.7,
+        ),
+        gr.Slider(
+            label="Top-p",
             minimum=0.1,
             maximum=1.0,
+            step=0.05,
             value=0.95,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=100,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Penalización por repetición",
+            minimum=1.0,
+            maximum=2.0,
             step=0.05,
+            value=1.2,
         ),
     ],
+    examples=[
+        ["Crea una historia corta sobre el Pombero, un personaje de la mitología guaraní."],
+        ["Genera un personaje basado en la leyenda del Cadejo."],
+        ["Inventa una narrativa en torno al Nahual en un entorno contemporáneo."],
+    ],
+    cache_examples=False,
 )
 if __name__ == "__main__":
+    if model_loaded:
+        print("Lanzando aplicación Gradio...")
+        demo.launch(
+            share=False,
+            show_error=True
+        )
+    else:
+        print("Error al cargar el modelo. No se puede iniciar la aplicación.")