Futuresony commited on
Commit
9d4528b
·
verified ·
1 Parent(s): 21f236d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -52
app.py CHANGED
@@ -1,42 +1,34 @@
 
 
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
4
- import gradio as gr
5
 
6
- # --------------------
7
- # Load Base Model and LoRA Adapter
8
- # --------------------
9
  def load_model_and_adapter():
10
- base_model_name = "unsloth/Llama-3.2-3B-Instruct" # Replace with your base model name
11
- adapter_repo = "Futuresony/future_ai_12_10_2024" # Your Hugging Face LoRA repo
12
-
13
- # Load tokenizer
 
14
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
15
-
16
- # Fix rope_scaling in model configuration
17
- config = AutoModelForCausalLM.config_class.from_pretrained(base_model_name)
18
- if hasattr(config, "rope_scaling"):
19
- config.rope_scaling = {"type": "dynamic", "factor": 32.0} # Override with valid keys
20
-
21
- # Load base model with fixed config
22
  base_model = AutoModelForCausalLM.from_pretrained(
23
  base_model_name,
24
- config=config,
25
- torch_dtype=torch.float16, # Use float16 for efficiency if GPU is available
26
- device_map="auto" # Automatically map to GPU or CPU
27
  )
28
-
29
  # Load LoRA adapter
30
- model = PeftModel.from_pretrained(base_model, adapter_repo)
31
- model.eval() # Set to evaluation mode
 
 
 
32
  return tokenizer, model
33
 
34
- # Load the model and tokenizer once
35
- tokenizer, model = load_model_and_adapter()
36
 
37
- # --------------------
38
- # Generate Response Function
39
- # --------------------
40
  def respond(
41
  message,
42
  history: list[tuple[str, str]],
@@ -45,49 +37,58 @@ def respond(
45
  temperature,
46
  top_p,
47
  ):
 
 
 
 
 
48
  messages = [{"role": "system", "content": system_message}]
49
-
50
- for val in history:
51
- if val[0]:
52
- messages.append({"role": "user", "content": val[0]})
53
- if val[1]:
54
- messages.append({"role": "assistant", "content": val[1]})
55
-
56
  messages.append({"role": "user", "content": message})
57
 
58
- # Prepare input prompt for generation
59
- prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
60
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
 
61
 
62
  # Generate response
63
- outputs = model.generate(
64
- **inputs,
65
- max_length=max_tokens,
66
  temperature=temperature,
67
  top_p=top_p,
68
  pad_token_id=tokenizer.eos_token_id,
69
- eos_token_id=tokenizer.eos_token_id
70
  )
71
-
72
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
73
- response = response.split("assistant:")[-1].strip() # Clean response
 
74
  return response
75
 
76
- # --------------------
77
- # Gradio Interface
78
- # --------------------
79
  demo = gr.ChatInterface(
80
  respond,
81
  additional_inputs=[
82
- gr.Textbox(value="You are a helpful assistant.", label="System message"),
83
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
84
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
85
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
86
  ],
87
  )
88
 
89
- # --------------------
90
- # Launch the Interface
91
- # --------------------
92
  if __name__ == "__main__":
93
  demo.launch()
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
3
  import torch
 
4
  from peft import PeftModel
 
5
 
6
+ # Function to load the base model and LoRA adapter
 
 
7
  def load_model_and_adapter():
8
+ base_model_name = "Futuresony/future_ai_12_10_2024.gguf" # Replace with your model path or ID
9
+ adapter_path = "./adapter" # Adjust this path to your LoRA adapter files location
10
+
11
+ # Load configuration and tokenizer
12
+ config = AutoConfig.from_pretrained(base_model_name)
13
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
14
+
15
+ # Load the base model
 
 
 
 
 
16
  base_model = AutoModelForCausalLM.from_pretrained(
17
  base_model_name,
18
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
19
+ device_map="auto",
 
20
  )
21
+
22
  # Load LoRA adapter
23
+ model = PeftModel.from_pretrained(base_model, adapter_path)
24
+
25
+ # Ensure the model is ready
26
+ model.eval()
27
+
28
  return tokenizer, model
29
 
 
 
30
 
31
+ # Function to handle the conversation
 
 
32
  def respond(
33
  message,
34
  history: list[tuple[str, str]],
 
37
  temperature,
38
  top_p,
39
  ):
40
+ # Load tokenizer and model
41
+ global tokenizer, model
42
+ tokenizer, model = load_model_and_adapter()
43
+
44
+ # Build the input prompt
45
  messages = [{"role": "system", "content": system_message}]
46
+ for user_input, assistant_response in history:
47
+ if user_input:
48
+ messages.append({"role": "user", "content": user_input})
49
+ if assistant_response:
50
+ messages.append({"role": "assistant", "content": assistant_response})
 
 
51
  messages.append({"role": "user", "content": message})
52
 
53
+ # Tokenize input
54
+ inputs = tokenizer.apply_chat_template(
55
+ messages,
56
+ add_generation_prompt=True,
57
+ return_tensors="pt",
58
+ ).to(model.device)
59
 
60
  # Generate response
61
+ output = model.generate(
62
+ inputs,
63
+ max_new_tokens=max_tokens,
64
  temperature=temperature,
65
  top_p=top_p,
66
  pad_token_id=tokenizer.eos_token_id,
 
67
  )
68
+
69
+ # Decode response
70
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
71
+
72
  return response
73
 
74
+
75
+ # Gradio interface
 
76
  demo = gr.ChatInterface(
77
  respond,
78
  additional_inputs=[
79
+ gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
80
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
81
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
82
+ gr.Slider(
83
+ minimum=0.1,
84
+ maximum=1.0,
85
+ value=0.95,
86
+ step=0.05,
87
+ label="Top-p (nucleus sampling)",
88
+ ),
89
  ],
90
  )
91
 
 
 
 
92
  if __name__ == "__main__":
93
  demo.launch()
94
+