FuturesonyAi / app.py
Futuresony's picture
Update app.py
9e566d1 verified
raw
history blame
1.59 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, LlamaConfig
from peft import PeftModel # For loading adapter files
# Path to the base model and adapter
BASE_MODEL_PATH = "unsloth/Llama-3.2-3B-Instruct" # Replace with your base model path
ADAPTER_PATH = "Futuresony/future_ai_12_10_2024.gguf/adapter" # Your Hugging Face repo
# Function to clean invalid rope_scaling fields in model config
def clean_rope_scaling(config):
if "rope_scaling" in config:
rope_scaling = config["rope_scaling"]
# Retain only "type" and "factor" fields
config["rope_scaling"] = {
"type": rope_scaling.get("rope_type", "linear"),
"factor": rope_scaling.get("factor", 1.0),
}
return config
# Load base model and tokenizer
print("Loading base model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH)
# Load and clean model configuration
config = LlamaConfig.from_pretrained(BASE_MODEL_PATH)
cleaned_config_dict = clean_rope_scaling(config.to_dict())
# Reconstruct the cleaned LlamaConfig object
config = LlamaConfig(**cleaned_config_dict)
# Load model with cleaned configuration
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_PATH,
config=config,
torch_dtype=torch.float16,
device_map="auto"
)
# Load adapter using PEFT
print("Loading adapter...")
model = PeftModel.from_pretrained(model, ADAPTER_PATH)
# Set model to evaluation mode
model.eval()
print("Model and adapter loaded successfully!")