Spaces:

Wh1plashR
/

AppTry

Sleeping

App Files Files Community

AppTry / app.py

Wh1plashR

Update app.py

fa550dd verified 4 months ago

raw

history blame

3.48 kB

	import os
	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import snapshot_download, hf_hub_download

	# ── 1) download the “full” Instruct repo (config.json, tokenizer files, etc.) ────
	instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct"
	local_dir = snapshot_download(repo_id=instruct_repo)

	# ── 2) download the GGUF weights into that same folder ───────────────────────────
	gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
	hf_hub_download(
	repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
	filename=gguf_filename,
	local_dir=local_dir,
	local_dir_use_symlinks=False
	)

	# sanity check
	gguf_path = os.path.join(local_dir, gguf_filename)
	assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"

	# ── 3) load tokenizer from the combined folder ──────────────────────────────────
	tokenizer = AutoTokenizer.from_pretrained(
	local_dir,
	trust_remote_code=True
	)

	# ── 4) load the causal‑LM model, pointing at the GGUF file ──────────────────────
	model = AutoModelForCausalLM.from_pretrained(
	local_dir,
	gguf_file=gguf_filename, # relative to local_dir
	device_map="auto",
	trust_remote_code=True
	)
	model = torch.compile(model) # PyTorch 2.x compile for ~20–30% speedup

	# ── Prompt template ─────────────────────────────────────────────────────────────
	prompt_prefix = """
	You are an energy‑saving expert tasked to help households reduce their monthly electricity bills.
	Given the user's appliance usage information (device name, wattage, hours used per day, days used per week):
	1. Flag the highest energy consumers.
	2. Recommend practical, empathetic, achievable actions.
	3. Suggest appliance swaps (e.g. LED, inverter AC) and habit changes.
	Format with bullet points.
	Here is the summary:
	"""

	# ── Generation function ─────────────────────────────────────────────────────────
	def generate_recommendation(appliance_info: str) -> str:
	prompt = prompt_prefix + appliance_info + "\n\nRecommendations:"
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=120,
	use_cache=True,
	do_sample=False,
	temperature=0.0
	)
	text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return text.split("Recommendations:")[-1].strip()

	# ── Gradio interface ────────────────────────────────────────────────────────────
	iface = gr.Interface(
	fn=generate_recommendation,
	inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150 W, 8 h/day, 7 days/week\n..."),
	outputs="text",
	title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
	description="Provide your appliance usage summary to get targeted, GGUF‑powered energy‑saving recommendations."
	)

	if __name__ == "__main__":
	iface.launch()