File size: 3,475 Bytes
293845a
506c92c
 
e14bcdf
fa550dd
e14bcdf
fa550dd
 
 
293845a
fa550dd
293845a
fa550dd
 
 
 
 
 
e14bcdf
293845a
 
 
 
fa550dd
84f6b1a
293845a
fa550dd
84f6b1a
e14bcdf
fa550dd
84f6b1a
293845a
fa550dd
293845a
fa550dd
84f6b1a
fa550dd
cf42c9a
84f6b1a
547b515
e14bcdf
547b515
 
 
 
 
e14bcdf
cf42c9a
506c92c
84f6b1a
547b515
 
 
be2c3d1
547b515
 
84f6b1a
e14bcdf
 
 
547b515
 
 
506c92c
e14bcdf
506c92c
 
e14bcdf
506c92c
84f6b1a
fa550dd
506c92c
 
 
e14bcdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import snapshot_download, hf_hub_download

# ── 1) download the β€œfull” Instruct repo (config.json, tokenizer files, etc.) ────
instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct"
local_dir = snapshot_download(repo_id=instruct_repo)

# ── 2) download the GGUF weights into that same folder ───────────────────────────
gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
hf_hub_download(
    repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
    filename=gguf_filename,
    local_dir=local_dir,
    local_dir_use_symlinks=False
)

# sanity check
gguf_path = os.path.join(local_dir, gguf_filename)
assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"

# ── 3) load tokenizer from the combined folder ──────────────────────────────────
tokenizer = AutoTokenizer.from_pretrained(
    local_dir,
    trust_remote_code=True
)

# ── 4) load the causal‑LM model, pointing at the GGUF file ──────────────────────
model = AutoModelForCausalLM.from_pretrained(
    local_dir,
    gguf_file=gguf_filename,    # relative to local_dir
    device_map="auto",
    trust_remote_code=True
)
model = torch.compile(model)    # PyTorch 2.x compile for ~20–30% speedup

# ── Prompt template ─────────────────────────────────────────────────────────────
prompt_prefix = """
You are an energy‑saving expert tasked to help households reduce their monthly electricity bills.
Given the user's appliance usage information (device name, wattage, hours used per day, days used per week):
1. Flag the highest energy consumers.
2. Recommend practical, empathetic, achievable actions.
3. Suggest appliance swaps (e.g. LED, inverter AC) and habit changes.
Format with bullet points.
Here is the summary:
"""

# ── Generation function ─────────────────────────────────────────────────────────
def generate_recommendation(appliance_info: str) -> str:
    prompt = prompt_prefix + appliance_info + "\n\nRecommendations:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=120,
            use_cache=True,
            do_sample=False,
            temperature=0.0
        )
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text.split("Recommendations:")[-1].strip()

# ── Gradio interface ────────────────────────────────────────────────────────────
iface = gr.Interface(
    fn=generate_recommendation,
    inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β€―W, 8β€―h/day, 7β€―days/week\n..."),
    outputs="text",
    title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
    description="Provide your appliance usage summary to get targeted, GGUF‑powered energy‑saving recommendations."
)

if __name__ == "__main__":
    iface.launch()