from transformers import ( | |
AutoModelForCausalLM, | |
AutoTokenizer, | |
AutoTokenizer, | |
) | |
from peft import PeftModel, PeftConfig | |
import torch | |
d_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None | |
local_model_path = "outputs/checkpoint-100" # Path to the combined weights | |
# Loading the base Model | |
config = PeftConfig.from_pretrained(local_model_path) | |
model = AutoModelForCausalLM.from_pretrained( | |
config.base_model_name_or_path, | |
return_dict=True, | |
# load_in_4bit=True, | |
device_map=d_map, | |
ignore_mismatched_sizes=True, | |
# from_tf=True, | |
) | |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
# load the base model with the Lora model | |
model = PeftModel.from_pretrained(model, local_model_path) | |
merged = model.merge_and_unload() | |
merged.save_pretrained("outputs/merged") | |
tokenizer.save_pretrained("outputs/merged") |