from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    AutoTokenizer,
)
from peft import PeftModel, PeftConfig
import torch

d_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None
local_model_path = "outputs/checkpoint-100"     # Path to the combined weights

# Loading the base Model
config = PeftConfig.from_pretrained(local_model_path)

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path, 
    return_dict=True, 
    # load_in_4bit=True, 
    device_map=d_map,
    ignore_mismatched_sizes=True,
    # from_tf=True,  
)

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# load the base model with the Lora model
model = PeftModel.from_pretrained(model, local_model_path)

merged = model.merge_and_unload()

merged.save_pretrained("outputs/merged")

tokenizer.save_pretrained("outputs/merged")