Spaces:

Ruurd
/

lad

Running on Zero

Ruurd commited on May 19

Commit

a88f6f9

verified ·

1 Parent(s): 47aa004

Revert to old model temporarily

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,8 +18,8 @@ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B", use_fast=Tr
 vocab_size = len(tokenizer)
 eos_token_id = tokenizer.eos_token_id
 mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
-assistant_marker_ids = tokenizer.encode("<|start_header_id|>assistant<|end_header_id|>", add_special_tokens=False)
 # def load_model():
 #     ckpt_path = hf_hub_download(
 #         repo_id="ruurd/tini_bi_m",
@@ -39,7 +39,7 @@ def load_model():
         repo_id="ruurd/tini_model",
         filename="diffusion-model.pth",
         token=os.getenv("HF_TOKEN"),
-        # revision="xxx",
     )
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -195,14 +195,18 @@ def generate_diffusion_text(input_ids, top_p, top_k):
         conf = probs[range(len(sampled)), sampled].cpu().numpy()
     return sampled, conf
 def format_chat_prompt(question):
-    return (
-        "<|begin_of_text|>\n"
-        "<|start_header_id|>system<|end_header_id|>\n"
-        "You are a helpful assistant.\n"
-        "<|start_header_id|>user<|end_header_id|>\n"
-        f"{question}\n"
-        "<|start_header_id|>assistant<|end_header_id|>\n"
     )

 vocab_size = len(tokenizer)
 eos_token_id = tokenizer.eos_token_id
 mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
+# assistant_marker_ids = tokenizer.encode("<|start_header_id|>assistant<|end_header_id|>", add_special_tokens=False)
+assistant_marker_ids = tokenizer.encode("Assistant:", add_special_tokens=False)
 # def load_model():
 #     ckpt_path = hf_hub_download(
 #         repo_id="ruurd/tini_bi_m",
         repo_id="ruurd/tini_model",
         filename="diffusion-model.pth",
         token=os.getenv("HF_TOKEN"),
+        revision="1ffb916dd34f442f87cf06dda74b96f86eaf1d15",
     )
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         conf = probs[range(len(sampled)), sampled].cpu().numpy()
     return sampled, conf
+# def format_chat_prompt(question):
+#     return (
+#         "<|begin_of_text|>\n"
+#         "<|start_header_id|>system<|end_header_id|>\n"
+#         "You are a helpful assistant.\n"
+#         "<|start_header_id|>user<|end_header_id|>\n"
+#         f"{question}\n"
+#         "<|start_header_id|>assistant<|end_header_id|>\n"
+#     )
 def format_chat_prompt(question):
+    return(
+        f"User:{question}\nAssistant:"
     )