Spaces:
Running
on
Zero
Running
on
Zero
Revert to old model temporarily
Browse files
app.py
CHANGED
@@ -18,8 +18,8 @@ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B", use_fast=Tr
|
|
18 |
vocab_size = len(tokenizer)
|
19 |
eos_token_id = tokenizer.eos_token_id
|
20 |
mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
|
21 |
-
assistant_marker_ids = tokenizer.encode("<|start_header_id|>assistant<|end_header_id|>", add_special_tokens=False)
|
22 |
-
|
23 |
# def load_model():
|
24 |
# ckpt_path = hf_hub_download(
|
25 |
# repo_id="ruurd/tini_bi_m",
|
@@ -39,7 +39,7 @@ def load_model():
|
|
39 |
repo_id="ruurd/tini_model",
|
40 |
filename="diffusion-model.pth",
|
41 |
token=os.getenv("HF_TOKEN"),
|
42 |
-
|
43 |
)
|
44 |
|
45 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
@@ -195,14 +195,18 @@ def generate_diffusion_text(input_ids, top_p, top_k):
|
|
195 |
conf = probs[range(len(sampled)), sampled].cpu().numpy()
|
196 |
return sampled, conf
|
197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
def format_chat_prompt(question):
|
199 |
-
return
|
200 |
-
"
|
201 |
-
"<|start_header_id|>system<|end_header_id|>\n"
|
202 |
-
"You are a helpful assistant.\n"
|
203 |
-
"<|start_header_id|>user<|end_header_id|>\n"
|
204 |
-
f"{question}\n"
|
205 |
-
"<|start_header_id|>assistant<|end_header_id|>\n"
|
206 |
)
|
207 |
|
208 |
|
|
|
18 |
vocab_size = len(tokenizer)
|
19 |
eos_token_id = tokenizer.eos_token_id
|
20 |
mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
|
21 |
+
# assistant_marker_ids = tokenizer.encode("<|start_header_id|>assistant<|end_header_id|>", add_special_tokens=False)
|
22 |
+
assistant_marker_ids = tokenizer.encode("Assistant:", add_special_tokens=False)
|
23 |
# def load_model():
|
24 |
# ckpt_path = hf_hub_download(
|
25 |
# repo_id="ruurd/tini_bi_m",
|
|
|
39 |
repo_id="ruurd/tini_model",
|
40 |
filename="diffusion-model.pth",
|
41 |
token=os.getenv("HF_TOKEN"),
|
42 |
+
revision="1ffb916dd34f442f87cf06dda74b96f86eaf1d15",
|
43 |
)
|
44 |
|
45 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
195 |
conf = probs[range(len(sampled)), sampled].cpu().numpy()
|
196 |
return sampled, conf
|
197 |
|
198 |
+
# def format_chat_prompt(question):
|
199 |
+
# return (
|
200 |
+
# "<|begin_of_text|>\n"
|
201 |
+
# "<|start_header_id|>system<|end_header_id|>\n"
|
202 |
+
# "You are a helpful assistant.\n"
|
203 |
+
# "<|start_header_id|>user<|end_header_id|>\n"
|
204 |
+
# f"{question}\n"
|
205 |
+
# "<|start_header_id|>assistant<|end_header_id|>\n"
|
206 |
+
# )
|
207 |
def format_chat_prompt(question):
|
208 |
+
return(
|
209 |
+
f"User:{question}\nAssistant:"
|
|
|
|
|
|
|
|
|
|
|
210 |
)
|
211 |
|
212 |
|