Futuresony commited on
Commit
a294ce4
·
verified ·
1 Parent(s): c2709f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -34
app.py CHANGED
@@ -3,62 +3,53 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
4
  import torch
5
 
6
- # Define the base and LoRA model IDs
7
- base_model_id = "unsloth/gemma-2-9b-bnb-4bit"
8
  lora_model_id = "Futuresony/future_12_10_2024"
9
 
10
- # Load the base model on CPU with float16
11
  base_model = AutoModelForCausalLM.from_pretrained(
12
  base_model_id,
13
- torch_dtype=torch.float16,
14
- device_map="cpu", # Load the model on CPU, no GPU
15
  )
16
 
17
  # Load the PEFT LoRA model
18
  model = PeftModel.from_pretrained(base_model, lora_model_id)
19
 
20
- # Tokenizer for the model
21
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
22
 
23
- # Function to respond to the user's input
24
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
25
- # Prepare the message history for chat completion
26
  messages = [{"role": "system", "content": system_message}]
27
- for val in history:
28
- if val[0]:
29
- messages.append({"role": "user", "content": val[0]})
30
- if val[1]:
31
- messages.append({"role": "assistant", "content": val[1]})
32
-
33
  messages.append({"role": "user", "content": message})
34
 
35
- # Generate a response
36
- response = ""
37
- for message in model.chat_completion(
38
- messages,
39
- max_tokens=max_tokens,
40
- stream=True,
41
  temperature=temperature,
42
  top_p=top_p,
43
- ):
44
- token = message.choices[0].delta.content
45
- response += token
46
- yield response
47
 
48
- # Gradio interface setup
49
  demo = gr.ChatInterface(
50
- respond,
51
  additional_inputs=[
52
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
53
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
54
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
55
- gr.Slider(
56
- minimum=0.1,
57
- maximum=1.0,
58
- value=0.95,
59
- step=0.05,
60
- label="Top-p (nucleus sampling)",
61
- ),
62
  ],
63
  )
64
 
 
3
  from peft import PeftModel
4
  import torch
5
 
6
+ # Use a CPU-compatible base model (replace this with your actual full-precision model)
7
+ base_model_id = "unsloth/gemma-2b" # Replace with real CPU-compatible model
8
  lora_model_id = "Futuresony/future_12_10_2024"
9
 
10
+ # Load the base model on CPU
11
  base_model = AutoModelForCausalLM.from_pretrained(
12
  base_model_id,
13
+ torch_dtype=torch.float32, # Use float32 for CPU compatibility
14
+ device_map="cpu"
15
  )
16
 
17
  # Load the PEFT LoRA model
18
  model = PeftModel.from_pretrained(base_model, lora_model_id)
19
 
20
+ # Load tokenizer
21
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
22
 
23
+ # Chat function
24
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
 
25
  messages = [{"role": "system", "content": system_message}]
26
+ for user_msg, bot_msg in history:
27
+ if user_msg:
28
+ messages.append({"role": "user", "content": user_msg})
29
+ if bot_msg:
30
+ messages.append({"role": "assistant", "content": bot_msg})
 
31
  messages.append({"role": "user", "content": message})
32
 
33
+ # Generate response (simulated loop for streaming output)
34
+ inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cpu")
35
+ outputs = model.generate(
36
+ inputs,
37
+ max_new_tokens=max_tokens,
 
38
  temperature=temperature,
39
  top_p=top_p,
40
+ do_sample=True,
41
+ )
42
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
+ yield response
44
 
45
+ # Gradio UI
46
  demo = gr.ChatInterface(
47
+ fn=respond,
48
  additional_inputs=[
49
+ gr.Textbox(value="You are a friendly chatbot.", label="System message"),
50
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
 
 
 
 
 
 
53
  ],
54
  )
55