Pinkstack commited on
Commit
0839086
·
verified ·
1 Parent(s): 0e6314f

fixed issues and made fine-tuning easier

Browse files

modified model max length & added chat template even though the model is not for chat.

Files changed (1) hide show
  1. tokenizer_config.json +3 -2
tokenizer_config.json CHANGED
@@ -179,10 +179,11 @@
179
  }
180
  },
181
  "bos_token": "<|endoftext|>",
 
182
  "clean_up_tokenization_spaces": false,
183
  "eos_token": "<|endoftext|>",
184
- "extra_special_tokens": {},
185
- "model_max_length": 1000000000000000019884624838656,
186
  "pad_token": "<|pad|>",
187
  "padding_side": "right",
188
  "tokenizer_class": "GPT2Tokenizer",
 
179
  }
180
  },
181
  "bos_token": "<|endoftext|>",
182
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>' + message['content'] + '' }}{% elif message['role'] == 'user' %}{{ '<|user|>' + message['content'] + '' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>' + message['content'] + eos_token + '' }}{% else %}{{ '<|assistant|>' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>' }}{% endif %}{% endfor %}",
183
  "clean_up_tokenization_spaces": false,
184
  "eos_token": "<|endoftext|>",
185
+ "extra_special_tokens": {},
186
+ "model_max_length": 32768,
187
  "pad_token": "<|pad|>",
188
  "padding_side": "right",
189
  "tokenizer_class": "GPT2Tokenizer",