Futuresony commited on
Commit
4c839c8
·
verified ·
1 Parent(s): 6aee846

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -0
app.py CHANGED
@@ -114,6 +114,33 @@ def load_llm_model(model_id, hf_token):
114
  print("Error: HF_TOKEN secret is not set. Cannot load Hugging Face model.")
115
  return None, None
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  try:
118
  llm_tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
119
  if llm_tokenizer.pad_token is None:
 
114
  print("Error: HF_TOKEN secret is not set. Cannot load Hugging Face model.")
115
  return None, None
116
 
117
+ try:
118
+ llm_tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
119
+
120
+ # Explicitly set the chat template for Gemma models
121
+ # This template formats messages as <start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n{response}<end_of_turn>\n
122
+ # and adds <bos> at the beginning and <start_of_turn>model\n at the end for generation prompt.
123
+ llm_tokenizer.chat_template = "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<start_of_turn>user\n' + message['content'] + '<end_of_turn>\n' }}{% elif message['role'] == 'system' %}{{ '<start_of_turn>system\n' + message['content'] + '<end_of_turn>\n' }}{% elif message['role'] == 'tool' %}{{ '<start_of_turn>tool\n' + message['content'] + '<end_of_turn>\n' }}{% elif message['role'] == 'model' %}{{ '<start_of_turn>model\n' + message['content'] + '<end_of_turn>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<start_of_turn>model\n' }}{% endif %}"
124
+
125
+
126
+ if llm_tokenizer.pad_token is None:
127
+ llm_tokenizer.pad_token = llm_tokenizer.eos_token
128
+
129
+ llm_model = AutoModelForCausalLM.from_pretrained(
130
+ model_id,
131
+ token=hf_token,
132
+ device_map="auto", # This will likely map to 'cpu'
133
+ )
134
+
135
+ print(f"Model {model_id} loaded in full precision.")
136
+ return llm_model, llm_tokenizer
137
+
138
+ except Exception as e:
139
+ print(f"Error loading model {model_id}: {e}")
140
+ print("Please ensure transformers, trl, peft, and accelerate are installed.")
141
+ print("Check your Hugging Face token.")
142
+ return None, None
143
+
144
  try:
145
  llm_tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
146
  if llm_tokenizer.pad_token is None: