Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, LlamaConfig | |
from peft import PeftModel # For loading adapter files | |
# Path to the base model and adapter | |
BASE_MODEL_PATH = "unsloth/Llama-3.2-3B-Instruct" # Replace with your base model path | |
ADAPTER_PATH = "Futuresony/future_ai_12_10_2024.gguf/adapter" # Your Hugging Face repo | |
# Function to clean invalid rope_scaling fields in model config | |
def clean_rope_scaling(config): | |
if "rope_scaling" in config: | |
rope_scaling = config["rope_scaling"] | |
# Retain only "type" and "factor" fields | |
config["rope_scaling"] = { | |
"type": rope_scaling.get("rope_type", "linear"), | |
"factor": rope_scaling.get("factor", 1.0), | |
} | |
return config | |
# Load base model and tokenizer | |
print("Loading base model and tokenizer...") | |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH) | |
# Load and clean model configuration | |
config = LlamaConfig.from_pretrained(BASE_MODEL_PATH) | |
cleaned_config_dict = clean_rope_scaling(config.to_dict()) | |
# Reconstruct the cleaned LlamaConfig object | |
config = LlamaConfig(**cleaned_config_dict) | |
# Load model with cleaned configuration | |
print("Loading model...") | |
model = AutoModelForCausalLM.from_pretrained( | |
BASE_MODEL_PATH, | |
config=config, | |
torch_dtype=torch.float16, | |
device_map="auto" | |
) | |
# Load adapter using PEFT | |
print("Loading adapter...") | |
model = PeftModel.from_pretrained(model, ADAPTER_PATH) | |
# Set model to evaluation mode | |
model.eval() | |
print("Model and adapter loaded successfully!") | |