import os import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import snapshot_download, hf_hub_download # Download model files def setup_model(): instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct" local_dir = snapshot_download(repo_id=instruct_repo) gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf" hf_hub_download( repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF", filename=gguf_filename, local_dir=local_dir, local_dir_use_symlinks=False ) tokenizer = AutoTokenizer.from_pretrained(local_dir, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( local_dir, gguf_file=gguf_filename, trust_remote_code=True ) return tokenizer, torch.compile(model) tokenizer, model = setup_model() prompt_prefix = ( "You are the best energy-saving advisor. " "Given appliances (name, wattage, hours/day, days/week), identify top consumers and up to 5 actionable bullet-point recommendations (practical, empathetic), " "including appliance swaps and habit changes. " "For each, include estimated monthly kWh saved and cost reduction. " "Keep response under 120 tokens, bullets only." "\nSummary:\n" ) # Generation function def generate_recommendation(appliance_info: str) -> str: prompt = prompt_prefix + appliance_info + "\n\nRecommendations:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=120, return_dict_in_generate=False, do_sample=False, temperature=0.0 ) text = tokenizer.decode(outputs[0], skip_special_tokens=True) return text.split("Recommendations:")[-1].strip()