Spaces:

somosnlp-hackathon-2025
/

Iberotales-thinking-demo

Running

App Files Files Community

daqc commited on 24 days ago

Commit

ba0efb9

verified ·

1 Parent(s): 655b309

Upload 2 files

Browse files

Files changed (2) hide show

app.py +480 -0
requirements.txt +13 -0

app.py ADDED Viewed

	@@ -0,0 +1,480 @@

+import os
+import gradio as gr
+from gradio import ChatMessage
+import torch
+import torch._dynamo
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from threading import Thread
+from huggingface_hub import hf_hub_download, login
+from dotenv import load_dotenv
+import re
+from llama_cpp import Llama
+from typing import Iterator
+# Cargar variables de entorno
+load_dotenv()
+# Configurar token de Hugging Face
+HF_TOKEN = os.getenv("HF_TOKEN")
+if HF_TOKEN:
+    login(token=HF_TOKEN)
+# Intentar importar spaces solo si estamos en un espacio de Hugging Face
+try:
+    import spaces
+    SPACES_AVAILABLE = True
+except ImportError:
+    SPACES_AVAILABLE = False
+# Desactivar TorchDynamo para evitar errores de compilación
+torch._dynamo.config.suppress_errors = True
+torch._dynamo.disable()
+# Configuración
+MODEL_ID = "somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es"
+GGUF_MODEL_ID = "somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es-finetune-gguf"
+GGUF_FILENAME = "gemma-3-finetune.Q8_0.gguf"
+GGUF_REVISION = "main"
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 2048
+# Verificar si estamos en un espacio de Hugging Face
+IS_HF_SPACE = any([
+    os.getenv("SPACE_ID") is not None,
+    os.getenv("SPACE_AUTHOR_NAME") is not None,
+    os.getenv("SPACE_REPO_NAME") is not None,
+    os.getenv("SPACE_HOST") is not None,
+])
+# System prompt personalizado
+DEFAULT_SYSTEM_MESSAGE = """Resuelve el siguiente problema.
+Primero, piensa en voz alta qué debes hacer, paso por paso y de forma resumida, entre <think> y </think>.
+Luego, da la respuesta final entre <SOLUTION> y </SOLUTION>.
+No escribas nada fuera de ese formato."""
+# Base de datos de personajes por país con banderas
+PERSONAJES_POR_PAIS = {
+    "🇦🇷 Argentina": [
+        {"nombre": "La Difunta Correa", "imagen": "images/ar1.jpg", "descripcion": "Santa popular que murió de sed siguiendo a su esposo reclutado"},
+        {"nombre": "El Lobizón", "imagen": "images/ar2.jpg", "descripcion": "Hombre lobo de la tradición gaucha, séptimo hijo varón maldito"},
+        {"nombre": "La Telesita", "imagen": "images/ar3.webp", "descripcion": "Bailarina folklórica que se aparece en festivales y zambas"}
+    ],
+    "🇧🇴 Bolivia": [
+        {"nombre": "El Tío del Cerro Rico", "imagen": "images/bo1.webp", "descripcion": "Señor de las minas que protege y castiga a los mineros"},
+        {"nombre": "El Ekeko", "imagen": "images/bo2.jpg", "descripcion": "Dios aymara de la abundancia y la fortuna con jorobas"},
+        {"nombre": "El Jichi", "imagen": "images/bo3.webp", "descripcion": "Serpiente protectora de ríos y lagunas en la cultura andina"}
+    ]
+};
+# Variables globales
+model = None
+tokenizer = None
+current_personajes = []  # Para mantener el estado de los personajes actuales
+def load_model():
+    """Cargar modelo y tokenizador"""
+    global model, tokenizer
+    if torch.cuda.is_available():
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_ID,
+                torch_dtype=torch.float32,
+                device_map="auto",
+                trust_remote_code=True,
+            )
+            if tokenizer.pad_token is None:
+                tokenizer.pad_token = tokenizer.eos_token
+            return True
+        except Exception as e:
+            print(f"Error GPU: {e}")
+            return False
+    else:
+        try:
+            local_model_path = os.path.join("models", GGUF_FILENAME)
+            if os.path.exists(local_model_path):
+                model_path = local_model_path
+            else:
+                model_path = hf_hub_download(
+                    repo_id=GGUF_MODEL_ID,
+                    filename=GGUF_FILENAME,
+                    revision=GGUF_REVISION,
+                    local_dir="./models",
+                    force_download=False,
+                    resume_download=True
+                )
+            tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")
+            model = Llama(
+                model_path=model_path,
+                n_ctx=2048,
+                n_threads=4,
+                n_gpu_layers=0
+            )
+            return True
+        except Exception as e:
+            print(f"Error GGUF: {e}")
+            return False
+model_loaded = load_model()
+def format_chat_history(messages: list, exclude_last_user: bool = True) -> list:
+    """Formatea el historial de chat para el modelo"""
+    formatted_history = []
+    messages_to_process = messages[:]
+    if exclude_last_user and messages_to_process and messages_to_process[-1].get("role") == "user":
+        messages_to_process = messages_to_process[:-1]
+    for message in messages_to_process:
+        current_role = message.get("role")
+        current_content = message.get("content", "").strip()
+        if current_role == "assistant" and message.get("metadata"):
+            continue
+        if not current_content:
+            continue
+        if formatted_history and formatted_history[-1]["role"] == current_role:
+            formatted_history[-1]["content"] += "\n\n" + current_content
+        else:
+            formatted_history.append({
+                "role": current_role,
+                "content": current_content
+            })
+    return formatted_history
+def stream_iberotales_response(
+    user_message: str,
+    messages: list,
+    system_message: str = DEFAULT_SYSTEM_MESSAGE,
+    max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
+    temperature: float = 0.7,
+    top_p: float = 0.95,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2,
+) -> Iterator[list]:
+    """Genera respuesta con streaming"""
+    global model, tokenizer
+    if model is None or tokenizer is None:
+        messages.append(ChatMessage(role="assistant", content="Error: Modelo no disponible."))
+        yield messages
+        return
+    try:
+        chat_history = format_chat_history(messages, exclude_last_user=True)
+        conversation = []
+        if system_message.strip():
+            conversation.append({"role": "system", "content": system_message.strip()})
+        conversation.extend(chat_history)
+        conversation.append({"role": "user", "content": user_message})
+        # Validar alternancia
+        for i in range(1, len(conversation)):
+            if conversation[i]["role"] == conversation[i-1]["role"] and conversation[i-1]["role"] != "system":
+                messages.append(ChatMessage(role="assistant", content="Error: Reinicia la conversación."))
+                yield messages
+                return
+        prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
+        response = model(
+            prompt,
+            max_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repeat_penalty=repetition_penalty,
+            stream=True
+        )
+        full_response = ""
+        thinking_message_index = None
+        solution_message_index = None
+        in_think_block = False
+        in_solution_block = False
+        thinking_complete = False
+        for chunk in response:
+            if chunk["choices"][0]["finish_reason"] is None:
+                new_text = chunk["choices"][0]["text"]
+                full_response += new_text
+                # Procesar pensamiento
+                if "<think>" in full_response and not thinking_complete:
+                    if not in_think_block:
+                        in_think_block = True
+                        if thinking_message_index is None:
+                            messages.append(ChatMessage(
+                                role="assistant",
+                                content="",
+                                metadata={"title": "🤔 Pensando..."}
+                            ))
+                            thinking_message_index = len(messages) - 1
+                    think_start = full_response.find("<think>") + 7
+                    if "</think>" in full_response:
+                        think_end = full_response.find("</think>")
+                        current_thinking = full_response[think_start:think_end].strip()
+                        thinking_complete = True
+                        in_think_block = False
+                    else:
+                        current_thinking = full_response[think_start:].strip()
+                    if thinking_message_index is not None:
+                        messages[thinking_message_index] = ChatMessage(
+                            role="assistant",
+                            content=current_thinking,
+                            metadata={"title": "🤔 Pensando..."}
+                        )
+                        yield messages
+                # Procesar solución
+                if "<SOLUTION>" in full_response:
+                    if not in_solution_block:
+                        in_solution_block = True
+                        if solution_message_index is None:
+                            messages.append(ChatMessage(role="assistant", content=""))
+                            solution_message_index = len(messages) - 1
+                    solution_start = full_response.find("<SOLUTION>") + 10
+                    if "</SOLUTION>" in full_response:
+                        solution_end = full_response.find("</SOLUTION>")
+                        current_solution = full_response[solution_start:solution_end].strip()
+                        in_solution_block = False
+                    else:
+                        current_solution = full_response[solution_start:].strip()
+                    if solution_message_index is not None and current_solution:
+                        messages[solution_message_index] = ChatMessage(
+                            role="assistant",
+                            content=current_solution
+                        )
+                        yield messages
+        # Respuesta sin formato
+        if full_response.strip() and solution_message_index is None:
+            clean_response = full_response
+            if "<think>" in clean_response and "</think>" in clean_response:
+                clean_response = re.sub(r'<think>.*?</think>', '', clean_response, flags=re.DOTALL)
+            if "<SOLUTION>" in clean_response and "</SOLUTION>" in clean_response:
+                clean_response = re.sub(r'<SOLUTION>(.*?)</SOLUTION>', r'\1', clean_response, flags=re.DOTALL)
+            clean_response = clean_response.strip()
+            if clean_response:
+                messages.append(ChatMessage(role="assistant", content=clean_response))
+                yield messages
+    except Exception as e:
+        messages.append(ChatMessage(role="assistant", content=f"Error: {str(e)}"))
+        yield messages
+def user_message(msg: str, history: list) -> tuple[str, list]:
+    """Añade mensaje del usuario al historial"""
+    history.append(ChatMessage(role="user", content=msg))
+    return "", history
+def actualizar_personajes(pais_seleccionado):
+    """Actualiza la galería de personajes según el país seleccionado"""
+    global current_personajes
+    personajes = PERSONAJES_POR_PAIS.get(pais_seleccionado, [])
+    current_personajes = personajes  # Guardamos el estado actual
+    if not personajes:
+        return [], "Selecciona un país para ver sus personajes"
+    # Crear lista de imágenes y etiquetas para la galería
+    imagenes = []
+    for p in personajes:
+        if os.path.exists(p["imagen"]):
+            imagenes.append((p["imagen"], f"{p['nombre']}: {p['descripcion']}"))
+        else:
+            # Imagen placeholder si no existe
+            imagenes.append(("data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgZmlsbD0iI2NjYyIvPjx0ZXh0IHg9IjUwIiB5PSI1MCIgZm9udC1mYW1pbHk9IkFyaWFsIiBmb250LXNpemU9IjEyIiBmaWxsPSIjNjY2IiB0ZXh0LWFuY2hvcj0ibWlkZGxlIiBkeT0iLjNlbSI+SW1hZ2VuPC90ZXh0Pjwvc3ZnPg==", f"{p['nombre']}: {p['descripcion']}"))
+    return imagenes, f"Personajes de {pais_seleccionado}"
+def crear_prompt_desde_personaje(evt: gr.SelectData):
+    """Crea un prompt basado en el personaje seleccionado"""
+    global current_personajes
+    try:
+        if evt.index is not None and evt.index < len(current_personajes):
+            personaje = current_personajes[evt.index]
+            return f"Crea una historia sobre {personaje['nombre']}, {personaje['descripcion']}"  #si alguien lee esto, cambiar el dataste a cuenta en lugar de crea
+        else:
+            return "Crea una historia sobre un personaje mítico"
+    except Exception as e:
+        print(f"Error al crear prompt: {e}")
+        return "Crea una historia sobre un personaje mítico"
+# Aplicar decorador @spaces.GPU si es necesario
+if IS_HF_SPACE and SPACES_AVAILABLE and torch.cuda.is_available():
+    stream_iberotales_response = spaces.GPU(stream_iberotales_response)
+# CSS personalizado para mejorar la apariencia
+custom_css = """
+.gradio-container {
+    max-width: 1400px !important;
+    margin: auto;
+    padding-top: 1.5rem;
+}
+#galeria .grid-wrap {
+    max-height: 350px;
+    overflow-y: auto;
+}
+#galeria .grid-container {
+    grid-template-columns: repeat(1, 1fr) !important;
+    gap: 0.5rem;
+}
+#galeria .thumbnail-item {
+    aspect-ratio: 1;
+    max-height: 100px;
+}
+#galeria .thumbnail-item img {
+    object-fit: cover;
+    width: 100%;
+    height: 100%;
+    border-radius: 8px;
+}
+.header-info {
+    background: linear-gradient(135deg, #2c3e50 0%, #1a1a2e 100%);
+    color: white;
+    padding: 1rem;
+    border-radius: 12px;
+    margin-bottom: 1rem;
+    text-align: center;
+}
+"""
+# Crear la interfaz
+with gr.Blocks(fill_height=True, title="Iberotales", css=custom_css) as demo:
+    # Header con información del proyecto
+    with gr.Row():
+        with gr.Column():
+            gr.HTML("""
+                    <div class="header-info">
+                        <h1>📚 Iberotales</h1>
+                        <p><strong>Autor:</strong> David Quispe &nbsp;|&nbsp; <a href="https://github.com/mcdaqc/Iberotales" target="_blank" style="text-decoration: none;">GitHub</a> &nbsp;|&nbsp; <a href="https://huggingface.co/somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es" target="_blank" style="text-decoration: none;">Modelo</a> &nbsp;|&nbsp; <a href="https://huggingface.co/somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es-finetune-gguf" target="_blank" style="text-decoration: none;">GGUF</a></p>
+                        <p><em>Alineando modelos de lenguaje con la narrativa de mitos y leyendas de Iberoamérica.</em></p>
+                        <p><em>Hackathon SomosNLP 2025</em></p>
+                    </div>
+                 """)
+    with gr.Row():
+        # Panel izquierdo - Pokédex de personajes
+        with gr.Column(scale=1, min_width=320):
+            gr.Markdown("### 🗃️ Pokédex de Personajes")
+            pais_dropdown = gr.Dropdown(
+                choices=list(PERSONAJES_POR_PAIS.keys()),
+                value="🇦🇷 Argentina",
+                label="País",
+                container=False
+            )
+            galeria_personajes = gr.Gallery(
+                value=[],
+                label="Personajes",
+                show_label=False,
+                elem_id="galeria",
+                columns=1,
+                rows=4,
+                height=350,
+                object_fit="cover",
+                preview=False  # Esto evita que se expanda automáticamente
+            )
+        # Panel derecho - Chat
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(
+                type="messages",
+                show_label=False,
+                height=400,
+                avatar_images=(None, "🏛️")
+            )
+            with gr.Row():
+                input_box = gr.Textbox(
+                    placeholder="Escribe tu historia o selecciona un personaje...",
+                    show_label=False,
+                    scale=4,
+                    container=False
+                )
+                send_button = gr.Button("📤", scale=1, variant="primary")
+            with gr.Row():
+                clear_button = gr.Button("🗑️ Limpiar", scale=1, size="sm")
+                with gr.Column(scale=3):
+                    with gr.Row():
+                        max_tokens = gr.Slider(100, MAX_MAX_NEW_TOKENS, DEFAULT_MAX_NEW_TOKENS, label="Tokens", container=False)
+                        temperature = gr.Slider(0.1, 2.0, 0.7, label="Temp", container=False)
+    # Variables de estado
+    msg_store = gr.State("")
+    # Eventos
+    def submit_message(msg, history):
+        if not msg.strip():
+            return msg, history
+        return "", user_message(msg, history)[1]
+    def generate_response(msg, history, max_tok, temp):
+        yield from stream_iberotales_response(msg, history, DEFAULT_SYSTEM_MESSAGE, max_tok, temp)
+    # Actualizar personajes cuando cambia el país
+    pais_dropdown.change(
+        fn=actualizar_personajes,
+        inputs=[pais_dropdown],
+        outputs=[galeria_personajes, gr.Textbox(visible=False)]
+    )
+    # Cargar personajes iniciales
+    demo.load(
+        fn=actualizar_personajes,
+        inputs=[pais_dropdown],
+        outputs=[galeria_personajes, gr.Textbox(visible=False)]
+    )
+    # Crear prompt desde galería
+    galeria_personajes.select(
+        fn=crear_prompt_desde_personaje,
+        outputs=[input_box]
+    )
+    # Envío de mensajes
+    input_box.submit(
+        lambda msg, hist: (msg, submit_message(msg, hist)[1]),
+        inputs=[input_box, chatbot],
+        outputs=[msg_store, chatbot],
+        queue=False
+    ).then(
+        generate_response,
+        inputs=[msg_store, chatbot, max_tokens, temperature],
+        outputs=chatbot
+    )
+    send_button.click(
+        lambda msg, hist: (msg, submit_message(msg, hist)[1]),
+        inputs=[input_box, chatbot],
+        outputs=[msg_store, chatbot],
+        queue=False
+    ).then(
+        generate_response,
+        inputs=[msg_store, chatbot, max_tokens, temperature],
+        outputs=chatbot
+    )
+    clear_button.click(
+        lambda: ([], "", ""),
+        outputs=[chatbot, input_box, msg_store],
+        queue=False
+    )
+# Lanzar aplicación
+if __name__ == "__main__":
+    if model_loaded:
+        demo.launch(share=False, show_error=True)
+    else:
+        print("Error al cargar el modelo.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+gradio>=4.0.0
+torch>=2.0.0
+transformers>=4.36.0
+huggingface_hub>=0.20.0
+llama-cpp-python>=0.2.0
+python-dotenv
+accelerate
+huggingface-hub
+protobuf
+sentencepiece
+gguf
+hf_xet
+pillow