Bbxbzbdbdfdfd / app.py
Kfjjdjdjdhdhd's picture
Update app.py
ef673b7 verified
import os
import logging
from flask import Flask, render_template_string, send_file, abort
from huggingface_hub import hf_hub_download, login as hf_login
from dotenv import load_dotenv
load_dotenv()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
hf_token = os.getenv("HF_TOKEN")
try:
hf_login(token=hf_token)
logger.info("Login en Hugging Face exitoso con el token proporcionado.")
except Exception as e:
logger.error(f"Error durante el login en Hugging Face: {e}")
app = Flask(__name__)
MODEL_FILENAME = 'gemma3-1b-it-int4.task'
HUGGINGFACE_REPO = 'litert-community/Gemma3-1B-IT'
MODEL_LOCAL_PATH = os.path.join(os.getcwd(), MODEL_FILENAME)
def download_model_file():
if not os.path.exists(MODEL_LOCAL_PATH):
logger.info("Archivo de modelo no encontrado localmente. Iniciando descarga desde Hugging Face a la carpeta local...")
try:
hf_hub_download(
repo_id=HUGGINGFACE_REPO,
filename=MODEL_FILENAME,
local_dir=".",
local_dir_use_symlinks=False
)
logger.info(f"Descarga completada: {MODEL_LOCAL_PATH}")
except Exception as e:
logger.error(f"Error al descargar el archivo de modelo: {e}")
raise
else:
logger.info("El archivo de modelo ya existe localmente.")
return MODEL_LOCAL_PATH
model_file_path = download_model_file()
logger.info(f"Ruta del archivo de modelo: {model_file_path}")
HTML_CONTENT = """<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>LLM Inference Web Demo</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
textarea { font-family: monospace; }
</style>
</head>
<body>
<h1>Demo de Inferencia LLM</h1>
<p>Ingresa el texto de entrada y presiona "Get Response".</p>
<label for="input">Input:</label><br />
<textarea id="input" style="height: 300px; width: 600px"></textarea><br />
<input type="button" id="submit" value="Get Response" disabled /><br /><br />
<label for="output">Result:</label><br />
<textarea id="output" style="height: 300px; width: 600px"></textarea>
<div id="error-message" style="color: red;"></div>
<script type="module" src="/index.js"></script>
</body>
</html>
"""
JS_CONTENT = """import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai';
const input = document.getElementById('input');
const output = document.getElementById('output');
const submit = document.getElementById('submit');
const modelFileName = '/download';
const errorMessageDiv = document.getElementById('error-message');
function displayPartialResults(partialResults, complete) {
output.textContent += partialResults;
if (complete) {
if (!output.textContent) {
output.textContent = 'Result is empty';
}
submit.disabled = false;
}
}
async function runDemo() {
const genaiFileset = await FilesetResolver.forGenAiTasks(
'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm');
let llmInference;
submit.onclick = () => {
output.textContent = '';
submit.disabled = true;
llmInference.generateResponse(input.value, displayPartialResults);
};
submit.value = 'Loading the model...';
try {
LlmInference
.createFromOptions(genaiFileset, {
baseOptions: {modelAssetPath: modelFileName},
})
.then(llm => {
llmInference = llm;
submit.disabled = false;
submit.value = 'Get Response';
})
.catch(error => {
console.error("Error initializing LlmInference:", error);
errorMessageDiv.innerHTML = `Failed to initialize the task due to WebGPU limitations. Details: ${error} <br><br>` +
`**Troubleshooting Steps:**<br>` +
`1. **Verify WebGPU Support:**<br>` +
` - **Browser:** Use **Google Chrome** or **Microsoft Edge (latest versions).**<br>` +
` - **Check chrome://gpu (or edge://gpu):** In your browser address bar, type \`chrome://gpu\` (or \`edge://gpu\`) and press Enter. Look for "WebGPU" section. <br>` +
` - **Status:** Should say "Hardware accelerated". If disabled or "Software only", WebGPU is not working correctly.<br>` +
` - **maxStorageBufferBindingSize:** Verify the reported value. If it's very low, your GPU/browser might be too limited.<br>` +
` - **Enable WebGPU Flags (if needed):** In chrome://flags (or edge://flags), search for "WebGPU" and try enabling flags like \`#enable-unsafe-webgpu\` and restart browser.<br><br>` +
`2. **Update Browser and GPU Drivers:**<br>` +
` - **Browser:** Update Chrome/Edge to the latest version.<br>` +
` - **GPU Drivers:** Download and install the latest drivers from NVIDIA, AMD, or Intel websites for your specific GPU and operating system. **Restart your computer after driver install.**<br><br>` +
`3. **Restart Your Computer:** A simple restart can resolve temporary issues.<br><br>` +
`4. **Try a Different Browser/Computer:** Test with a different WebGPU-compatible browser (Chrome/Edge) or on a different computer with a more capable GPU if possible.<br><br>` +
`5. **Check GPU Compatibility:** Older or very low-end GPUs might have limited WebGPU support.<br><br>` +
`If the issue persists after these steps, your GPU or browser may have inherent limitations for running this LLM demo in WebGPU.`;
submit.disabled = true;
submit.value = 'Failed to load model';
});
} catch (e) {
console.error("Error during LlmInference setup:", e);
errorMessageDiv.textContent = `Failed to set up LlmInference. Details: ${e}`;
submit.disabled = true;
submit.value = 'Failed to load model';
}
}
runDemo();
"""
@app.route('/')
def index():
return render_template_string(HTML_CONTENT)
@app.route('/index.js')
def serve_js():
return JS_CONTENT, 200, {'Content-Type': 'application/javascript'}
@app.route('/download')
def download_file():
logger.info(f"Solicitud para descargar el modelo desde: {model_file_path}")
if os.path.exists(model_file_path):
return send_file(model_file_path)
else:
logger.error(f"Archivo de modelo no encontrado en la ruta: {model_file_path}")
abort(404, description="Archivo de modelo no encontrado.")
if __name__ == '__main__':
logger.info("Iniciando la aplicación Flask en el puerto 5000")
app.run(debug=True, host="0.0.0.0", port=7860)