Futuresony commited on
Commit
9e566d1
·
verified ·
1 Parent(s): acdea71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -56
app.py CHANGED
@@ -7,23 +7,36 @@ from peft import PeftModel # For loading adapter files
7
  BASE_MODEL_PATH = "unsloth/Llama-3.2-3B-Instruct" # Replace with your base model path
8
  ADAPTER_PATH = "Futuresony/future_ai_12_10_2024.gguf/adapter" # Your Hugging Face repo
9
 
10
- # Function to clean rope_scaling in model config
11
  def clean_rope_scaling(config):
12
  if "rope_scaling" in config:
13
- valid_rope_scaling = {"type": "linear", "factor": config["rope_scaling"].get("factor", 1.0)}
14
- config["rope_scaling"] = valid_rope_scaling
 
 
 
 
15
  return config
16
 
17
  # Load base model and tokenizer
18
  print("Loading base model and tokenizer...")
19
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH)
20
 
21
- # Load and clean the model config
22
  config = LlamaConfig.from_pretrained(BASE_MODEL_PATH)
23
- clean_config = clean_rope_scaling(config.to_dict())
24
-
25
- # Load model with cleaned config
26
- model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_PATH, config=clean_config, torch_dtype=torch.float16, device_map="auto")
 
 
 
 
 
 
 
 
 
27
 
28
  # Load adapter using PEFT
29
  print("Loading adapter...")
@@ -32,51 +45,4 @@ model = PeftModel.from_pretrained(model, ADAPTER_PATH)
32
  # Set model to evaluation mode
33
  model.eval()
34
 
35
- # Function to generate responses
36
- def respond(
37
- message,
38
- history: list[tuple[str, str]],
39
- system_message,
40
- max_tokens,
41
- temperature,
42
- top_p,
43
- ):
44
- messages = [{"role": "system", "content": system_message}]
45
- for val in history:
46
- if val[0]:
47
- messages.append({"role": "user", "content": val[0]})
48
- if val[1]:
49
- messages.append({"role": "assistant", "content": val[1]})
50
-
51
- messages.append({"role": "user", "content": message})
52
-
53
- # Prepare input
54
- input_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
55
- inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
56
-
57
- # Generate response
58
- generation_config = GenerationConfig(
59
- max_new_tokens=max_tokens,
60
- temperature=temperature,
61
- top_p=top_p,
62
- do_sample=True,
63
- )
64
- output_ids = model.generate(**inputs, generation_config=generation_config)
65
- response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
66
-
67
- return response.split("assistant:")[-1].strip()
68
-
69
- # Gradio Interface
70
- demo = gr.ChatInterface(
71
- respond,
72
- additional_inputs=[
73
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
74
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
75
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
76
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
77
- ],
78
- )
79
-
80
- if __name__ == "__main__":
81
- demo.launch()
82
-
 
7
  BASE_MODEL_PATH = "unsloth/Llama-3.2-3B-Instruct" # Replace with your base model path
8
  ADAPTER_PATH = "Futuresony/future_ai_12_10_2024.gguf/adapter" # Your Hugging Face repo
9
 
10
+ # Function to clean invalid rope_scaling fields in model config
11
  def clean_rope_scaling(config):
12
  if "rope_scaling" in config:
13
+ rope_scaling = config["rope_scaling"]
14
+ # Retain only "type" and "factor" fields
15
+ config["rope_scaling"] = {
16
+ "type": rope_scaling.get("rope_type", "linear"),
17
+ "factor": rope_scaling.get("factor", 1.0),
18
+ }
19
  return config
20
 
21
  # Load base model and tokenizer
22
  print("Loading base model and tokenizer...")
23
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH)
24
 
25
+ # Load and clean model configuration
26
  config = LlamaConfig.from_pretrained(BASE_MODEL_PATH)
27
+ cleaned_config_dict = clean_rope_scaling(config.to_dict())
28
+
29
+ # Reconstruct the cleaned LlamaConfig object
30
+ config = LlamaConfig(**cleaned_config_dict)
31
+
32
+ # Load model with cleaned configuration
33
+ print("Loading model...")
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ BASE_MODEL_PATH,
36
+ config=config,
37
+ torch_dtype=torch.float16,
38
+ device_map="auto"
39
+ )
40
 
41
  # Load adapter using PEFT
42
  print("Loading adapter...")
 
45
  # Set model to evaluation mode
46
  model.eval()
47
 
48
+ print("Model and adapter loaded successfully!")