Spaces:

Wh1plashR
/

AppTry

Sleeping

App Files Files Community

Wh1plashR commited on May 6

Commit

fa550dd

verified ·

1 Parent(s): 293845a

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -12

app.py CHANGED Viewed

@@ -2,33 +2,39 @@ import os
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from huggingface_hub import snapshot_download
-# ── 1) pull down the entire repo (config.json, tokenizer files, GGUF, etc.) ─────
-repo_id = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
-local_dir = snapshot_download(repo_id=repo_id)
-# the GGUF filename inside that folder (must match exactly)
 gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
 # sanity check
 gguf_path = os.path.join(local_dir, gguf_filename)
 assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"
-# ── 2) load tokenizer from the local snapshot ────────────────────────────────────
 tokenizer = AutoTokenizer.from_pretrained(
     local_dir,
-    trust_remote_code=True,    # allow any custom code in the repo
 )
-# ── 3) load the GGUF‑quantized model from that same folder ───────────────────────
 model = AutoModelForCausalLM.from_pretrained(
     local_dir,
-    gguf_file=gguf_filename,   # relative name of the file in local_dir
     device_map="auto",
-    trust_remote_code=True,
 )
-model = torch.compile(model)  # Torch 2.0+ speed‑up
 # ── Prompt template ─────────────────────────────────────────────────────────────
 prompt_prefix = """
@@ -62,7 +68,7 @@ iface = gr.Interface(
     inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150 W, 8 h/day, 7 days/week\n..."),
     outputs="text",
     title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
-    description="Provide your per‑appliance summary to get targeted, gguf‑powered energy‑saving recommendations."
 )
 if __name__ == "__main__":

 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import snapshot_download, hf_hub_download
+# ── 1) download the “full” Instruct repo (config.json, tokenizer files, etc.) ────
+instruct_repo = "Qwen/Qwen2.5-0.5B-Instruct"
+local_dir = snapshot_download(repo_id=instruct_repo)
+# ── 2) download the GGUF weights into that same folder ───────────────────────────
 gguf_filename = "qwen2.5-0.5b-instruct-q5_k_m.gguf"
+hf_hub_download(
+    repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
+    filename=gguf_filename,
+    local_dir=local_dir,
+    local_dir_use_symlinks=False
+)
 # sanity check
 gguf_path = os.path.join(local_dir, gguf_filename)
 assert os.path.isfile(gguf_path), f"GGUF not found at {gguf_path}"
+# ── 3) load tokenizer from the combined folder ──────────────────────────────────
 tokenizer = AutoTokenizer.from_pretrained(
     local_dir,
+    trust_remote_code=True
 )
+# ── 4) load the causal‑LM model, pointing at the GGUF file ──────────────────────
 model = AutoModelForCausalLM.from_pretrained(
     local_dir,
+    gguf_file=gguf_filename,    # relative to local_dir
     device_map="auto",
+    trust_remote_code=True
 )
+model = torch.compile(model)    # PyTorch 2.x compile for ~20–30% speedup
 # ── Prompt template ─────────────────────────────────────────────────────────────
 prompt_prefix = """
     inputs=gr.Textbox(lines=8, placeholder="e.g. Refrigerator: 150 W, 8 h/day, 7 days/week\n..."),
     outputs="text",
     title="Energy‑Saving Tips (Qwen2.5‑0.5B‑Instruct‑GGUF)",
+    description="Provide your appliance usage summary to get targeted, GGUF‑powered energy‑saving recommendations."
 )
 if __name__ == "__main__":