File size: 1,836 Bytes
293845a 506c92c e14bcdf fa550dd e14bcdf 12662f5 84f6b1a 506c92c 12662f5 547b515 12662f5 547b515 be2c3d1 547b515 84f6b1a 12662f5 e14bcdf 547b515 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import snapshot_download, hf_hub_download
# Download model files
def setup_model():
instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct"
local_dir = snapshot_download(repo_id=instruct_repo)
gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
hf_hub_download(
repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
filename=gguf_filename,
local_dir=local_dir,
local_dir_use_symlinks=False
)
tokenizer = AutoTokenizer.from_pretrained(local_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
local_dir,
gguf_file=gguf_filename,
trust_remote_code=True
)
return tokenizer, torch.compile(model)
tokenizer, model = setup_model()
prompt_prefix = (
"You are the best energy-saving advisor. "
"Given appliances (name, wattage, hours/day, days/week), identify top consumers and up to 5 actionable bullet-point recommendations (practical, empathetic), "
"including appliance swaps and habit changes. "
"For each, include estimated monthly kWh saved and cost reduction. "
"Keep response under 120 tokens, bullets only."
"\nSummary:\n"
)
# Generation function
def generate_recommendation(appliance_info: str) -> str:
prompt = prompt_prefix + appliance_info + "\n\nRecommendations:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=120,
return_dict_in_generate=False,
do_sample=False,
temperature=0.0
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return text.split("Recommendations:")[-1].strip()
|