Spaces:

Futuresony
/

FuturesonyAi

Runtime error

App Files Files Community

Futuresony commited on Dec 16, 2024

Commit

9e566d1

verified ·

1 Parent(s): acdea71

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -56

app.py CHANGED Viewed

@@ -7,23 +7,36 @@ from peft import PeftModel  # For loading adapter files
 BASE_MODEL_PATH = "unsloth/Llama-3.2-3B-Instruct"  # Replace with your base model path
 ADAPTER_PATH = "Futuresony/future_ai_12_10_2024.gguf/adapter"  # Your Hugging Face repo
-# Function to clean rope_scaling in model config
 def clean_rope_scaling(config):
     if "rope_scaling" in config:
-        valid_rope_scaling = {"type": "linear", "factor": config["rope_scaling"].get("factor", 1.0)}
-        config["rope_scaling"] = valid_rope_scaling
     return config
 # Load base model and tokenizer
 print("Loading base model and tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH)
-# Load and clean the model config
 config = LlamaConfig.from_pretrained(BASE_MODEL_PATH)
-clean_config = clean_rope_scaling(config.to_dict())
-# Load model with cleaned config
-model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_PATH, config=clean_config, torch_dtype=torch.float16, device_map="auto")
 # Load adapter using PEFT
 print("Loading adapter...")
@@ -32,51 +45,4 @@ model = PeftModel.from_pretrained(model, ADAPTER_PATH)
 # Set model to evaluation mode
 model.eval()
-# Function to generate responses
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    # Prepare input
-    input_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
-    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
-    # Generate response
-    generation_config = GenerationConfig(
-        max_new_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        do_sample=True,
-    )
-    output_ids = model.generate(**inputs, generation_config=generation_config)
-    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    return response.split("assistant:")[-1].strip()
-# Gradio Interface
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

 BASE_MODEL_PATH = "unsloth/Llama-3.2-3B-Instruct"  # Replace with your base model path
 ADAPTER_PATH = "Futuresony/future_ai_12_10_2024.gguf/adapter"  # Your Hugging Face repo
+# Function to clean invalid rope_scaling fields in model config
 def clean_rope_scaling(config):
     if "rope_scaling" in config:
+        rope_scaling = config["rope_scaling"]
+        # Retain only "type" and "factor" fields
+        config["rope_scaling"] = {
+            "type": rope_scaling.get("rope_type", "linear"),
+            "factor": rope_scaling.get("factor", 1.0),
+        }
     return config
 # Load base model and tokenizer
 print("Loading base model and tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH)
+# Load and clean model configuration
 config = LlamaConfig.from_pretrained(BASE_MODEL_PATH)
+cleaned_config_dict = clean_rope_scaling(config.to_dict())
+# Reconstruct the cleaned LlamaConfig object
+config = LlamaConfig(**cleaned_config_dict)
+# Load model with cleaned configuration
+print("Loading model...")
+model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL_PATH,
+    config=config,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
 # Load adapter using PEFT
 print("Loading adapter...")
 # Set model to evaluation mode
 model.eval()
+print("Model and adapter loaded successfully!")