Spaces:
Running
Running
import os | |
import logging | |
from flask import Flask, render_template_string, send_file, abort | |
from huggingface_hub import hf_hub_download, login as hf_login | |
from dotenv import load_dotenv | |
load_dotenv() | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
hf_token = os.getenv("HF_TOKEN") | |
try: | |
hf_login(token=hf_token) | |
logger.info("Login en Hugging Face exitoso con el token proporcionado.") | |
except Exception as e: | |
logger.error(f"Error durante el login en Hugging Face: {e}") | |
app = Flask(__name__) | |
MODEL_FILENAME = 'gemma3-1b-it-int4.task' | |
HUGGINGFACE_REPO = 'litert-community/Gemma3-1B-IT' | |
MODEL_LOCAL_PATH = os.path.join(os.getcwd(), MODEL_FILENAME) | |
def download_model_file(): | |
if not os.path.exists(MODEL_LOCAL_PATH): | |
logger.info("Archivo de modelo no encontrado localmente. Iniciando descarga desde Hugging Face a la carpeta local...") | |
try: | |
hf_hub_download( | |
repo_id=HUGGINGFACE_REPO, | |
filename=MODEL_FILENAME, | |
local_dir=".", | |
local_dir_use_symlinks=False | |
) | |
logger.info(f"Descarga completada: {MODEL_LOCAL_PATH}") | |
except Exception as e: | |
logger.error(f"Error al descargar el archivo de modelo: {e}") | |
raise | |
else: | |
logger.info("El archivo de modelo ya existe localmente.") | |
return MODEL_LOCAL_PATH | |
model_file_path = download_model_file() | |
logger.info(f"Ruta del archivo de modelo: {model_file_path}") | |
HTML_CONTENT = """<!doctype html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<title>LLM Inference Web Demo</title> | |
<style> | |
body { font-family: Arial, sans-serif; margin: 20px; } | |
textarea { font-family: monospace; } | |
</style> | |
</head> | |
<body> | |
<h1>Demo de Inferencia LLM</h1> | |
<p>Ingresa el texto de entrada y presiona "Get Response".</p> | |
<label for="input">Input:</label><br /> | |
<textarea id="input" style="height: 300px; width: 600px"></textarea><br /> | |
<input type="button" id="submit" value="Get Response" disabled /><br /><br /> | |
<label for="output">Result:</label><br /> | |
<textarea id="output" style="height: 300px; width: 600px"></textarea> | |
<div id="error-message" style="color: red;"></div> | |
<script type="module" src="/index.js"></script> | |
</body> | |
</html> | |
""" | |
JS_CONTENT = """import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai'; | |
const input = document.getElementById('input'); | |
const output = document.getElementById('output'); | |
const submit = document.getElementById('submit'); | |
const modelFileName = '/download'; | |
const errorMessageDiv = document.getElementById('error-message'); | |
function displayPartialResults(partialResults, complete) { | |
output.textContent += partialResults; | |
if (complete) { | |
if (!output.textContent) { | |
output.textContent = 'Result is empty'; | |
} | |
submit.disabled = false; | |
} | |
} | |
async function runDemo() { | |
const genaiFileset = await FilesetResolver.forGenAiTasks( | |
'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm'); | |
let llmInference; | |
submit.onclick = () => { | |
output.textContent = ''; | |
submit.disabled = true; | |
llmInference.generateResponse(input.value, displayPartialResults); | |
}; | |
submit.value = 'Loading the model...'; | |
try { | |
LlmInference | |
.createFromOptions(genaiFileset, { | |
baseOptions: {modelAssetPath: modelFileName}, | |
}) | |
.then(llm => { | |
llmInference = llm; | |
submit.disabled = false; | |
submit.value = 'Get Response'; | |
}) | |
.catch(error => { | |
console.error("Error initializing LlmInference:", error); | |
errorMessageDiv.innerHTML = `Failed to initialize the task due to WebGPU limitations. Details: ${error} <br><br>` + | |
`**Troubleshooting Steps:**<br>` + | |
`1. **Verify WebGPU Support:**<br>` + | |
` - **Browser:** Use **Google Chrome** or **Microsoft Edge (latest versions).**<br>` + | |
` - **Check chrome://gpu (or edge://gpu):** In your browser address bar, type \`chrome://gpu\` (or \`edge://gpu\`) and press Enter. Look for "WebGPU" section. <br>` + | |
` - **Status:** Should say "Hardware accelerated". If disabled or "Software only", WebGPU is not working correctly.<br>` + | |
` - **maxStorageBufferBindingSize:** Verify the reported value. If it's very low, your GPU/browser might be too limited.<br>` + | |
` - **Enable WebGPU Flags (if needed):** In chrome://flags (or edge://flags), search for "WebGPU" and try enabling flags like \`#enable-unsafe-webgpu\` and restart browser.<br><br>` + | |
`2. **Update Browser and GPU Drivers:**<br>` + | |
` - **Browser:** Update Chrome/Edge to the latest version.<br>` + | |
` - **GPU Drivers:** Download and install the latest drivers from NVIDIA, AMD, or Intel websites for your specific GPU and operating system. **Restart your computer after driver install.**<br><br>` + | |
`3. **Restart Your Computer:** A simple restart can resolve temporary issues.<br><br>` + | |
`4. **Try a Different Browser/Computer:** Test with a different WebGPU-compatible browser (Chrome/Edge) or on a different computer with a more capable GPU if possible.<br><br>` + | |
`5. **Check GPU Compatibility:** Older or very low-end GPUs might have limited WebGPU support.<br><br>` + | |
`If the issue persists after these steps, your GPU or browser may have inherent limitations for running this LLM demo in WebGPU.`; | |
submit.disabled = true; | |
submit.value = 'Failed to load model'; | |
}); | |
} catch (e) { | |
console.error("Error during LlmInference setup:", e); | |
errorMessageDiv.textContent = `Failed to set up LlmInference. Details: ${e}`; | |
submit.disabled = true; | |
submit.value = 'Failed to load model'; | |
} | |
} | |
runDemo(); | |
""" | |
def index(): | |
return render_template_string(HTML_CONTENT) | |
def serve_js(): | |
return JS_CONTENT, 200, {'Content-Type': 'application/javascript'} | |
def download_file(): | |
logger.info(f"Solicitud para descargar el modelo desde: {model_file_path}") | |
if os.path.exists(model_file_path): | |
return send_file(model_file_path) | |
else: | |
logger.error(f"Archivo de modelo no encontrado en la ruta: {model_file_path}") | |
abort(404, description="Archivo de modelo no encontrado.") | |
if __name__ == '__main__': | |
logger.info("Iniciando la aplicación Flask en el puerto 5000") | |
app.run(debug=True, host="0.0.0.0", port=7860) |