Bbxbzbdbdfdfd

Running

App Files Files Community

Bbxbzbdbdfdfd / app.py

Kfjjdjdjdhdhd

Update app.py

ef673b7 verified 3 months ago

raw

history blame contribute delete

7.04 kB

	import os
	import logging
	from flask import Flask, render_template_string, send_file, abort
	from huggingface_hub import hf_hub_download, login as hf_login
	from dotenv import load_dotenv

	load_dotenv()

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	hf_token = os.getenv("HF_TOKEN")

	try:
	hf_login(token=hf_token)
	logger.info("Login en Hugging Face exitoso con el token proporcionado.")
	except Exception as e:
	logger.error(f"Error durante el login en Hugging Face: {e}")

	app = Flask(__name__)

	MODEL_FILENAME = 'gemma3-1b-it-int4.task'
	HUGGINGFACE_REPO = 'litert-community/Gemma3-1B-IT'
	MODEL_LOCAL_PATH = os.path.join(os.getcwd(), MODEL_FILENAME)

	def download_model_file():
	if not os.path.exists(MODEL_LOCAL_PATH):
	logger.info("Archivo de modelo no encontrado localmente. Iniciando descarga desde Hugging Face a la carpeta local...")
	try:
	hf_hub_download(
	repo_id=HUGGINGFACE_REPO,
	filename=MODEL_FILENAME,
	local_dir=".",
	local_dir_use_symlinks=False
	)
	logger.info(f"Descarga completada: {MODEL_LOCAL_PATH}")
	except Exception as e:
	logger.error(f"Error al descargar el archivo de modelo: {e}")
	raise
	else:
	logger.info("El archivo de modelo ya existe localmente.")
	return MODEL_LOCAL_PATH

	model_file_path = download_model_file()
	logger.info(f"Ruta del archivo de modelo: {model_file_path}")

	HTML_CONTENT = """<!doctype html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<title>LLM Inference Web Demo</title>
	<style>
	body { font-family: Arial, sans-serif; margin: 20px; }
	textarea { font-family: monospace; }
	</style>
	</head>
	<body>
	<h1>Demo de Inferencia LLM</h1>
	<p>Ingresa el texto de entrada y presiona "Get Response".</p>
	<label for="input">Input:</label><br />
	<textarea id="input" style="height: 300px; width: 600px"></textarea><br />
	<input type="button" id="submit" value="Get Response" disabled /><br /><br />
	<label for="output">Result:</label><br />
	<textarea id="output" style="height: 300px; width: 600px"></textarea>
	<div id="error-message" style="color: red;"></div>
	<script type="module" src="/index.js"></script>
	</body>
	</html>
	"""

	JS_CONTENT = """import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai';
	const input = document.getElementById('input');
	const output = document.getElementById('output');
	const submit = document.getElementById('submit');
	const modelFileName = '/download';
	const errorMessageDiv = document.getElementById('error-message');

	function displayPartialResults(partialResults, complete) {
	output.textContent += partialResults;
	if (complete) {
	if (!output.textContent) {
	output.textContent = 'Result is empty';
	}
	submit.disabled = false;
	}
	}
	async function runDemo() {
	const genaiFileset = await FilesetResolver.forGenAiTasks(
	'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm');
	let llmInference;
	submit.onclick = () => {
	output.textContent = '';
	submit.disabled = true;
	llmInference.generateResponse(input.value, displayPartialResults);
	};
	submit.value = 'Loading the model...';
	try {
	LlmInference
	.createFromOptions(genaiFileset, {
	baseOptions: {modelAssetPath: modelFileName},
	})
	.then(llm => {
	llmInference = llm;
	submit.disabled = false;
	submit.value = 'Get Response';
	})
	.catch(error => {
	console.error("Error initializing LlmInference:", error);
	errorMessageDiv.innerHTML = `Failed to initialize the task due to WebGPU limitations. Details: ${error} <br><br>` +
	`Troubleshooting Steps:<br>` +
	`1. Verify WebGPU Support:<br>` +
	` - Browser: Use Google Chrome or Microsoft Edge (latest versions).<br>` +
	` - Check chrome://gpu (or edge://gpu): In your browser address bar, type \`chrome://gpu\` (or \`edge://gpu\`) and press Enter. Look for "WebGPU" section. <br>` +
	` - Status: Should say "Hardware accelerated". If disabled or "Software only", WebGPU is not working correctly.<br>` +
	` - maxStorageBufferBindingSize: Verify the reported value. If it's very low, your GPU/browser might be too limited.<br>` +
	` - Enable WebGPU Flags (if needed): In chrome://flags (or edge://flags), search for "WebGPU" and try enabling flags like \`#enable-unsafe-webgpu\` and restart browser.<br><br>` +
	`2. Update Browser and GPU Drivers:<br>` +
	` - Browser: Update Chrome/Edge to the latest version.<br>` +
	` - GPU Drivers: Download and install the latest drivers from NVIDIA, AMD, or Intel websites for your specific GPU and operating system. Restart your computer after driver install.<br><br>` +
	`3. Restart Your Computer: A simple restart can resolve temporary issues.<br><br>` +
	`4. Try a Different Browser/Computer: Test with a different WebGPU-compatible browser (Chrome/Edge) or on a different computer with a more capable GPU if possible.<br><br>` +
	`5. Check GPU Compatibility: Older or very low-end GPUs might have limited WebGPU support.<br><br>` +
	`If the issue persists after these steps, your GPU or browser may have inherent limitations for running this LLM demo in WebGPU.`;
	submit.disabled = true;
	submit.value = 'Failed to load model';
	});
	} catch (e) {
	console.error("Error during LlmInference setup:", e);
	errorMessageDiv.textContent = `Failed to set up LlmInference. Details: ${e}`;
	submit.disabled = true;
	submit.value = 'Failed to load model';
	}
	}
	runDemo();
	"""

	@app.route('/')
	def index():
	return render_template_string(HTML_CONTENT)

	@app.route('/index.js')
	def serve_js():
	return JS_CONTENT, 200, {'Content-Type': 'application/javascript'}

	@app.route('/download')
	def download_file():
	logger.info(f"Solicitud para descargar el modelo desde: {model_file_path}")
	if os.path.exists(model_file_path):
	return send_file(model_file_path)
	else:
	logger.error(f"Archivo de modelo no encontrado en la ruta: {model_file_path}")
	abort(404, description="Archivo de modelo no encontrado.")

	if __name__ == '__main__':
	logger.info("Iniciando la aplicación Flask en el puerto 5000")
	app.run(debug=True, host="0.0.0.0", port=7860)