Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -114,6 +114,33 @@ def load_llm_model(model_id, hf_token):
|
|
114 |
print("Error: HF_TOKEN secret is not set. Cannot load Hugging Face model.")
|
115 |
return None, None
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
try:
|
118 |
llm_tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
|
119 |
if llm_tokenizer.pad_token is None:
|
|
|
114 |
print("Error: HF_TOKEN secret is not set. Cannot load Hugging Face model.")
|
115 |
return None, None
|
116 |
|
117 |
+
try:
|
118 |
+
llm_tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
|
119 |
+
|
120 |
+
# Explicitly set the chat template for Gemma models
|
121 |
+
# This template formats messages as <start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n{response}<end_of_turn>\n
|
122 |
+
# and adds <bos> at the beginning and <start_of_turn>model\n at the end for generation prompt.
|
123 |
+
llm_tokenizer.chat_template = "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<start_of_turn>user\n' + message['content'] + '<end_of_turn>\n' }}{% elif message['role'] == 'system' %}{{ '<start_of_turn>system\n' + message['content'] + '<end_of_turn>\n' }}{% elif message['role'] == 'tool' %}{{ '<start_of_turn>tool\n' + message['content'] + '<end_of_turn>\n' }}{% elif message['role'] == 'model' %}{{ '<start_of_turn>model\n' + message['content'] + '<end_of_turn>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<start_of_turn>model\n' }}{% endif %}"
|
124 |
+
|
125 |
+
|
126 |
+
if llm_tokenizer.pad_token is None:
|
127 |
+
llm_tokenizer.pad_token = llm_tokenizer.eos_token
|
128 |
+
|
129 |
+
llm_model = AutoModelForCausalLM.from_pretrained(
|
130 |
+
model_id,
|
131 |
+
token=hf_token,
|
132 |
+
device_map="auto", # This will likely map to 'cpu'
|
133 |
+
)
|
134 |
+
|
135 |
+
print(f"Model {model_id} loaded in full precision.")
|
136 |
+
return llm_model, llm_tokenizer
|
137 |
+
|
138 |
+
except Exception as e:
|
139 |
+
print(f"Error loading model {model_id}: {e}")
|
140 |
+
print("Please ensure transformers, trl, peft, and accelerate are installed.")
|
141 |
+
print("Check your Hugging Face token.")
|
142 |
+
return None, None
|
143 |
+
|
144 |
try:
|
145 |
llm_tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
|
146 |
if llm_tokenizer.pad_token is None:
|