Wh1plashR commited on
Commit
fa550dd
Β·
verified Β·
1 Parent(s): 293845a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -2,33 +2,39 @@ import os
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
- from huggingface_hub import snapshot_download
6
 
7
- # ── 1) pull down the entire repo (config.json, tokenizer files, GGUF, etc.) ─────
8
- repo_id = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
9
- local_dir = snapshot_download(repo_id=repo_id)
10
 
11
- # the GGUF filename inside that folder (must match exactly)
12
  gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
 
 
 
 
 
 
13
 
14
  # sanity check
15
  gguf_path = os.path.join(local_dir, gguf_filename)
16
  assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"
17
 
18
- # ── 2) load tokenizer from the local snapshot ────────────────────────────────────
19
  tokenizer = AutoTokenizer.from_pretrained(
20
  local_dir,
21
- trust_remote_code=True, # allow any custom code in the repo
22
  )
23
 
24
- # ── 3) load the GGUF‑quantized model from that same folder ───────────────────────
25
  model = AutoModelForCausalLM.from_pretrained(
26
  local_dir,
27
- gguf_file=gguf_filename, # relative name of the file in local_dir
28
  device_map="auto",
29
- trust_remote_code=True,
30
  )
31
- model = torch.compile(model) # Torch 2.0+ speed‑up
32
 
33
  # ── Prompt template ─────────────────────────────────────────────────────────────
34
  prompt_prefix = """
@@ -62,7 +68,7 @@ iface = gr.Interface(
62
  inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β€―W, 8β€―h/day, 7β€―days/week\n..."),
63
  outputs="text",
64
  title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
65
- description="Provide your per‑appliance summary to get targeted, gguf‑powered energy‑saving recommendations."
66
  )
67
 
68
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from huggingface_hub import snapshot_download, hf_hub_download
6
 
7
+ # ── 1) download the β€œfull” Instruct repo (config.json, tokenizer files, etc.) ────
8
+ instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct"
9
+ local_dir = snapshot_download(repo_id=instruct_repo)
10
 
11
+ # ── 2) download the GGUF weights into that same folder ───────────────────────────
12
  gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
13
+ hf_hub_download(
14
+ repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
15
+ filename=gguf_filename,
16
+ local_dir=local_dir,
17
+ local_dir_use_symlinks=False
18
+ )
19
 
20
  # sanity check
21
  gguf_path = os.path.join(local_dir, gguf_filename)
22
  assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"
23
 
24
+ # ── 3) load tokenizer from the combined folder ──────────────────────────────────
25
  tokenizer = AutoTokenizer.from_pretrained(
26
  local_dir,
27
+ trust_remote_code=True
28
  )
29
 
30
+ # ── 4) load the causal‑LM model, pointing at the GGUF file ──────────────────────
31
  model = AutoModelForCausalLM.from_pretrained(
32
  local_dir,
33
+ gguf_file=gguf_filename, # relative to local_dir
34
  device_map="auto",
35
+ trust_remote_code=True
36
  )
37
+ model = torch.compile(model) # PyTorch 2.x compile for ~20–30% speedup
38
 
39
  # ── Prompt template ─────────────────────────────────────────────────────────────
40
  prompt_prefix = """
 
68
  inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150β€―W, 8β€―h/day, 7β€―days/week\n..."),
69
  outputs="text",
70
  title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
71
+ description="Provide your appliance usage summary to get targeted, GGUF‑powered energy‑saving recommendations."
72
  )
73
 
74
  if __name__ == "__main__":