Spaces:

somosnlp-hackathon-2025
/

Iberotales-thinking-demo

Sleeping

App Files Files Community

Iberotales-thinking-demo / app.py

daqc

Upload 2 files

ba0efb9 verified 3 months ago

raw

history blame

18.7 kB

	import os
	import gradio as gr
	from gradio import ChatMessage
	import torch
	import torch._dynamo
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from threading import Thread
	from huggingface_hub import hf_hub_download, login
	from dotenv import load_dotenv
	import re
	from llama_cpp import Llama
	from typing import Iterator

	# Cargar variables de entorno
	load_dotenv()

	# Configurar token de Hugging Face
	HF_TOKEN = os.getenv("HF_TOKEN")
	if HF_TOKEN:
	login(token=HF_TOKEN)

	# Intentar importar spaces solo si estamos en un espacio de Hugging Face
	try:
	import spaces
	SPACES_AVAILABLE = True
	except ImportError:
	SPACES_AVAILABLE = False

	# Desactivar TorchDynamo para evitar errores de compilación
	torch._dynamo.config.suppress_errors = True
	torch._dynamo.disable()

	# Configuración
	MODEL_ID = "somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es"
	GGUF_MODEL_ID = "somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es-finetune-gguf"
	GGUF_FILENAME = "gemma-3-finetune.Q8_0.gguf"
	GGUF_REVISION = "main"
	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 2048

	# Verificar si estamos en un espacio de Hugging Face
	IS_HF_SPACE = any([
	os.getenv("SPACE_ID") is not None,
	os.getenv("SPACE_AUTHOR_NAME") is not None,
	os.getenv("SPACE_REPO_NAME") is not None,
	os.getenv("SPACE_HOST") is not None,
	])

	# System prompt personalizado
	DEFAULT_SYSTEM_MESSAGE = """Resuelve el siguiente problema.
	Primero, piensa en voz alta qué debes hacer, paso por paso y de forma resumida, entre <think> y </think>.
	Luego, da la respuesta final entre <SOLUTION> y </SOLUTION>.
	No escribas nada fuera de ese formato."""

	# Base de datos de personajes por país con banderas
	PERSONAJES_POR_PAIS = {
	"🇦🇷 Argentina": [
	{"nombre": "La Difunta Correa", "imagen": "images/ar1.jpg", "descripcion": "Santa popular que murió de sed siguiendo a su esposo reclutado"},
	{"nombre": "El Lobizón", "imagen": "images/ar2.jpg", "descripcion": "Hombre lobo de la tradición gaucha, séptimo hijo varón maldito"},
	{"nombre": "La Telesita", "imagen": "images/ar3.webp", "descripcion": "Bailarina folklórica que se aparece en festivales y zambas"}
	],
	"🇧🇴 Bolivia": [
	{"nombre": "El Tío del Cerro Rico", "imagen": "images/bo1.webp", "descripcion": "Señor de las minas que protege y castiga a los mineros"},
	{"nombre": "El Ekeko", "imagen": "images/bo2.jpg", "descripcion": "Dios aymara de la abundancia y la fortuna con jorobas"},
	{"nombre": "El Jichi", "imagen": "images/bo3.webp", "descripcion": "Serpiente protectora de ríos y lagunas en la cultura andina"}
	]
	};

	# Variables globales
	model = None
	tokenizer = None
	current_personajes = [] # Para mantener el estado de los personajes actuales

	def load_model():
	"""Cargar modelo y tokenizador"""
	global model, tokenizer

	if torch.cuda.is_available():
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32,
	device_map="auto",
	trust_remote_code=True,
	)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	return True
	except Exception as e:
	print(f"Error GPU: {e}")
	return False
	else:
	try:
	local_model_path = os.path.join("models", GGUF_FILENAME)
	if os.path.exists(local_model_path):
	model_path = local_model_path
	else:
	model_path = hf_hub_download(
	repo_id=GGUF_MODEL_ID,
	filename=GGUF_FILENAME,
	revision=GGUF_REVISION,
	local_dir="./models",
	force_download=False,
	resume_download=True
	)
	tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")
	model = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=4,
	n_gpu_layers=0
	)
	return True
	except Exception as e:
	print(f"Error GGUF: {e}")
	return False

	model_loaded = load_model()

	def format_chat_history(messages: list, exclude_last_user: bool = True) -> list:
	"""Formatea el historial de chat para el modelo"""
	formatted_history = []
	messages_to_process = messages[:]
	if exclude_last_user and messages_to_process and messages_to_process[-1].get("role") == "user":
	messages_to_process = messages_to_process[:-1]

	for message in messages_to_process:
	current_role = message.get("role")
	current_content = message.get("content", "").strip()

	if current_role == "assistant" and message.get("metadata"):
	continue
	if not current_content:
	continue

	if formatted_history and formatted_history[-1]["role"] == current_role:
	formatted_history[-1]["content"] += "\n\n" + current_content
	else:
	formatted_history.append({
	"role": current_role,
	"content": current_content
	})

	return formatted_history

	def stream_iberotales_response(
	user_message: str,
	messages: list,
	system_message: str = DEFAULT_SYSTEM_MESSAGE,
	max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
	temperature: float = 0.7,
	top_p: float = 0.95,
	top_k: int = 50,
	repetition_penalty: float = 1.2,
	) -> Iterator[list]:
	"""Genera respuesta con streaming"""
	global model, tokenizer

	if model is None or tokenizer is None:
	messages.append(ChatMessage(role="assistant", content="Error: Modelo no disponible."))
	yield messages
	return

	try:
	chat_history = format_chat_history(messages, exclude_last_user=True)
	conversation = []
	if system_message.strip():
	conversation.append({"role": "system", "content": system_message.strip()})
	conversation.extend(chat_history)
	conversation.append({"role": "user", "content": user_message})

	# Validar alternancia
	for i in range(1, len(conversation)):
	if conversation[i]["role"] == conversation[i-1]["role"] and conversation[i-1]["role"] != "system":
	messages.append(ChatMessage(role="assistant", content="Error: Reinicia la conversación."))
	yield messages
	return

	prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
	response = model(
	prompt,
	max_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repeat_penalty=repetition_penalty,
	stream=True
	)

	full_response = ""
	thinking_message_index = None
	solution_message_index = None
	in_think_block = False
	in_solution_block = False
	thinking_complete = False

	for chunk in response:
	if chunk["choices"][0]["finish_reason"] is None:
	new_text = chunk["choices"][0]["text"]
	full_response += new_text

	# Procesar pensamiento
	if "<think>" in full_response and not thinking_complete:
	if not in_think_block:
	in_think_block = True
	if thinking_message_index is None:
	messages.append(ChatMessage(
	role="assistant",
	content="",
	metadata={"title": "🤔 Pensando..."}
	))
	thinking_message_index = len(messages) - 1

	think_start = full_response.find("<think>") + 7
	if "</think>" in full_response:
	think_end = full_response.find("</think>")
	current_thinking = full_response[think_start:think_end].strip()
	thinking_complete = True
	in_think_block = False
	else:
	current_thinking = full_response[think_start:].strip()

	if thinking_message_index is not None:
	messages[thinking_message_index] = ChatMessage(
	role="assistant",
	content=current_thinking,
	metadata={"title": "🤔 Pensando..."}
	)
	yield messages

	# Procesar solución
	if "<SOLUTION>" in full_response:
	if not in_solution_block:
	in_solution_block = True
	if solution_message_index is None:
	messages.append(ChatMessage(role="assistant", content=""))
	solution_message_index = len(messages) - 1

	solution_start = full_response.find("<SOLUTION>") + 10
	if "</SOLUTION>" in full_response:
	solution_end = full_response.find("</SOLUTION>")
	current_solution = full_response[solution_start:solution_end].strip()
	in_solution_block = False
	else:
	current_solution = full_response[solution_start:].strip()

	if solution_message_index is not None and current_solution:
	messages[solution_message_index] = ChatMessage(
	role="assistant",
	content=current_solution
	)
	yield messages

	# Respuesta sin formato
	if full_response.strip() and solution_message_index is None:
	clean_response = full_response
	if "<think>" in clean_response and "</think>" in clean_response:
	clean_response = re.sub(r'<think>.*?</think>', '', clean_response, flags=re.DOTALL)
	if "<SOLUTION>" in clean_response and "</SOLUTION>" in clean_response:
	clean_response = re.sub(r'<SOLUTION>(.*?)</SOLUTION>', r'\1', clean_response, flags=re.DOTALL)

	clean_response = clean_response.strip()
	if clean_response:
	messages.append(ChatMessage(role="assistant", content=clean_response))
	yield messages

	except Exception as e:
	messages.append(ChatMessage(role="assistant", content=f"Error: {str(e)}"))
	yield messages

	def user_message(msg: str, history: list) -> tuple[str, list]:
	"""Añade mensaje del usuario al historial"""
	history.append(ChatMessage(role="user", content=msg))
	return "", history

	def actualizar_personajes(pais_seleccionado):
	"""Actualiza la galería de personajes según el país seleccionado"""
	global current_personajes
	personajes = PERSONAJES_POR_PAIS.get(pais_seleccionado, [])
	current_personajes = personajes # Guardamos el estado actual

	if not personajes:
	return [], "Selecciona un país para ver sus personajes"

	# Crear lista de imágenes y etiquetas para la galería
	imagenes = []
	for p in personajes:
	if os.path.exists(p["imagen"]):
	imagenes.append((p["imagen"], f"{p['nombre']}: {p['descripcion']}"))
	else:
	# Imagen placeholder si no existe
	imagenes.append(("data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgZmlsbD0iI2NjYyIvPjx0ZXh0IHg9IjUwIiB5PSI1MCIgZm9udC1mYW1pbHk9IkFyaWFsIiBmb250LXNpemU9IjEyIiBmaWxsPSIjNjY2IiB0ZXh0LWFuY2hvcj0ibWlkZGxlIiBkeT0iLjNlbSI+SW1hZ2VuPC90ZXh0Pjwvc3ZnPg==", f"{p['nombre']}: {p['descripcion']}"))

	return imagenes, f"Personajes de {pais_seleccionado}"

	def crear_prompt_desde_personaje(evt: gr.SelectData):
	"""Crea un prompt basado en el personaje seleccionado"""
	global current_personajes

	try:
	if evt.index is not None and evt.index < len(current_personajes):
	personaje = current_personajes[evt.index]
	return f"Crea una historia sobre {personaje['nombre']}, {personaje['descripcion']}" #si alguien lee esto, cambiar el dataste a cuenta en lugar de crea
	else:
	return "Crea una historia sobre un personaje mítico"
	except Exception as e:
	print(f"Error al crear prompt: {e}")
	return "Crea una historia sobre un personaje mítico"

	# Aplicar decorador @spaces.GPU si es necesario
	if IS_HF_SPACE and SPACES_AVAILABLE and torch.cuda.is_available():
	stream_iberotales_response = spaces.GPU(stream_iberotales_response)

	# CSS personalizado para mejorar la apariencia
	custom_css = """
	.gradio-container {
	max-width: 1400px !important;
	margin: auto;
	padding-top: 1.5rem;
	}
	#galeria .grid-wrap {
	max-height: 350px;
	overflow-y: auto;
	}
	#galeria .grid-container {
	grid-template-columns: repeat(1, 1fr) !important;
	gap: 0.5rem;
	}
	#galeria .thumbnail-item {
	aspect-ratio: 1;
	max-height: 100px;
	}
	#galeria .thumbnail-item img {
	object-fit: cover;
	width: 100%;
	height: 100%;
	border-radius: 8px;
	}
	.header-info {
	background: linear-gradient(135deg, #2c3e50 0%, #1a1a2e 100%);
	color: white;
	padding: 1rem;
	border-radius: 12px;
	margin-bottom: 1rem;
	text-align: center;
	}
	"""

	# Crear la interfaz
	with gr.Blocks(fill_height=True, title="Iberotales", css=custom_css) as demo:
	# Header con información del proyecto
	with gr.Row():
	with gr.Column():
	gr.HTML("""
	<div class="header-info">
	<h1>📚 Iberotales</h1>
	<p><strong>Autor:</strong> David Quispe  \|  <a href="https://github.com/mcdaqc/Iberotales" target="_blank" style="text-decoration: none;">GitHub</a>  \|  <a href="https://huggingface.co/somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es" target="_blank" style="text-decoration: none;">Modelo</a>  \|  <a href="https://huggingface.co/somosnlp-hackathon-2025/iberotales-gemma-3-1b-it-es-finetune-gguf" target="_blank" style="text-decoration: none;">GGUF</a></p>
	<p><em>Alineando modelos de lenguaje con la narrativa de mitos y leyendas de Iberoamérica.</em></p>
	<p><em>Hackathon SomosNLP 2025</em></p>
	</div>
	""")

	with gr.Row():
	# Panel izquierdo - Pokédex de personajes
	with gr.Column(scale=1, min_width=320):
	gr.Markdown("### 🗃️ Pokédex de Personajes")

	pais_dropdown = gr.Dropdown(
	choices=list(PERSONAJES_POR_PAIS.keys()),
	value="🇦🇷 Argentina",
	label="País",
	container=False
	)

	galeria_personajes = gr.Gallery(
	value=[],
	label="Personajes",
	show_label=False,
	elem_id="galeria",
	columns=1,
	rows=4,
	height=350,
	object_fit="cover",
	preview=False # Esto evita que se expanda automáticamente
	)

	# Panel derecho - Chat
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(
	type="messages",
	show_label=False,
	height=400,
	avatar_images=(None, "🏛️")
	)

	with gr.Row():
	input_box = gr.Textbox(
	placeholder="Escribe tu historia o selecciona un personaje...",
	show_label=False,
	scale=4,
	container=False
	)
	send_button = gr.Button("📤", scale=1, variant="primary")

	with gr.Row():
	clear_button = gr.Button("🗑️ Limpiar", scale=1, size="sm")

	with gr.Column(scale=3):
	with gr.Row():
	max_tokens = gr.Slider(100, MAX_MAX_NEW_TOKENS, DEFAULT_MAX_NEW_TOKENS, label="Tokens", container=False)
	temperature = gr.Slider(0.1, 2.0, 0.7, label="Temp", container=False)

	# Variables de estado
	msg_store = gr.State("")

	# Eventos
	def submit_message(msg, history):
	if not msg.strip():
	return msg, history
	return "", user_message(msg, history)[1]

	def generate_response(msg, history, max_tok, temp):
	yield from stream_iberotales_response(msg, history, DEFAULT_SYSTEM_MESSAGE, max_tok, temp)

	# Actualizar personajes cuando cambia el país
	pais_dropdown.change(
	fn=actualizar_personajes,
	inputs=[pais_dropdown],
	outputs=[galeria_personajes, gr.Textbox(visible=False)]
	)

	# Cargar personajes iniciales
	demo.load(
	fn=actualizar_personajes,
	inputs=[pais_dropdown],
	outputs=[galeria_personajes, gr.Textbox(visible=False)]
	)

	# Crear prompt desde galería
	galeria_personajes.select(
	fn=crear_prompt_desde_personaje,
	outputs=[input_box]
	)

	# Envío de mensajes
	input_box.submit(
	lambda msg, hist: (msg, submit_message(msg, hist)[1]),
	inputs=[input_box, chatbot],
	outputs=[msg_store, chatbot],
	queue=False
	).then(
	generate_response,
	inputs=[msg_store, chatbot, max_tokens, temperature],
	outputs=chatbot
	)

	send_button.click(
	lambda msg, hist: (msg, submit_message(msg, hist)[1]),
	inputs=[input_box, chatbot],
	outputs=[msg_store, chatbot],
	queue=False
	).then(
	generate_response,
	inputs=[msg_store, chatbot, max_tokens, temperature],
	outputs=chatbot
	)

	clear_button.click(
	lambda: ([], "", ""),
	outputs=[chatbot, input_box, msg_store],
	queue=False
	)

	# Lanzar aplicación
	if __name__ == "__main__":
	if model_loaded:
	demo.launch(share=False, show_error=True)
	else:
	print("Error al cargar el modelo.")