import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import snapshot_download, hf_hub_download

# Download model files
def setup_model():
    instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct"
    local_dir = snapshot_download(repo_id=instruct_repo)
    gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
    hf_hub_download(
        repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
        filename=gguf_filename,
        local_dir=local_dir,
        local_dir_use_symlinks=False
    )
    tokenizer = AutoTokenizer.from_pretrained(local_dir, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        local_dir,
        gguf_file=gguf_filename,
        trust_remote_code=True
    )
    return tokenizer, torch.compile(model)

tokenizer, model = setup_model()

prompt_prefix = (
    "You are the best energy-saving advisor. "
    "Given appliances (name, wattage, hours/day, days/week), identify top consumers and up to 5 actionable bullet-point recommendations (practical, empathetic), "
    "including appliance swaps and habit changes. "
    "For each, include estimated monthly kWh saved and cost reduction. "
    "Keep response under 120 tokens, bullets only."
    "\nSummary:\n"
)

# Generation function
def generate_recommendation(appliance_info: str) -> str:
    prompt = prompt_prefix + appliance_info + "\n\nRecommendations:"  
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=120,
            return_dict_in_generate=False,
            do_sample=False,
            temperature=0.0
        )
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text.split("Recommendations:")[-1].strip()