Futuresony commited on
Commit
48d9d00
·
verified ·
1 Parent(s): ee40f18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -42
app.py CHANGED
@@ -1,66 +1,66 @@
1
- import os
2
- import torch
3
  import gradio as gr
4
- from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
- from huggingface_hub import login
7
-
8
- # Login using HF token from secrets
9
- hf_token = os.environ.get("HF_TOKEN")
10
- if not hf_token:
11
- raise RuntimeError("Missing HF_TOKEN in secrets.")
12
- login(token=hf_token)
13
 
14
- # Base and LoRA model paths
15
  base_model_id = "unsloth/gemma-2-9b-bnb-4bit"
16
  lora_model_id = "Futuresony/future_12_10_2024"
17
 
18
- # Load tokenizer and base model
19
- tokenizer = AutoTokenizer.from_pretrained(base_model_id)
20
  base_model = AutoModelForCausalLM.from_pretrained(
21
  base_model_id,
22
  torch_dtype=torch.float16,
23
- device_map="auto"
24
  )
25
 
26
- # Load LoRA weights
27
  model = PeftModel.from_pretrained(base_model, lora_model_id)
28
- model.eval()
29
 
30
- # Chat function
31
- def generate_response(message, history, system_message, max_tokens, temperature, top_p):
32
- prompt = system_message + "\n\n"
33
- for user_input, bot_response in history:
34
- prompt += f"User: {user_input}\nAssistant: {bot_response}\n"
35
- prompt += f"User: {message}\nAssistant:"
36
 
37
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
38
- outputs = model.generate(
39
- **inputs,
40
- max_new_tokens=max_tokens,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  temperature=temperature,
42
  top_p=top_p,
43
- do_sample=True,
44
- pad_token_id=tokenizer.eos_token_id
45
- )
 
46
 
47
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
- final_response = response.split("Assistant:")[-1].strip()
49
- return final_response
50
-
51
- # Gradio interface
52
  demo = gr.ChatInterface(
53
- fn=generate_response,
54
  additional_inputs=[
55
- gr.Textbox(value="You are a helpful assistant.", label="System Message"),
56
- gr.Slider(50, 1024, value=256, step=1, label="Max Tokens"),
57
- gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
58
- gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
 
 
 
 
 
 
59
  ],
60
- title="LoRA Chat Assistant (Gemma-2)",
61
- description="Chat with your fine-tuned Gemma-2 LoRA model"
62
  )
63
 
64
  if __name__ == "__main__":
65
  demo.launch()
66
-
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
4
+ import torch
 
 
 
 
 
 
5
 
6
+ # Define the base and LoRA model IDs
7
  base_model_id = "unsloth/gemma-2-9b-bnb-4bit"
8
  lora_model_id = "Futuresony/future_12_10_2024"
9
 
10
+ # Load the base model on CPU with float16
 
11
  base_model = AutoModelForCausalLM.from_pretrained(
12
  base_model_id,
13
  torch_dtype=torch.float16,
14
+ device_map="cpu", # Load the model on CPU, no GPU
15
  )
16
 
17
+ # Load the PEFT LoRA model
18
  model = PeftModel.from_pretrained(base_model, lora_model_id)
 
19
 
20
+ # Tokenizer for the model
21
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id)
 
 
 
 
22
 
23
+ # Function to respond to the user's input
24
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
25
+ # Prepare the message history for chat completion
26
+ messages = [{"role": "system", "content": system_message}]
27
+ for val in history:
28
+ if val[0]:
29
+ messages.append({"role": "user", "content": val[0]})
30
+ if val[1]:
31
+ messages.append({"role": "assistant", "content": val[1]})
32
+
33
+ messages.append({"role": "user", "content": message})
34
+
35
+ # Generate a response
36
+ response = ""
37
+ for message in model.chat_completion(
38
+ messages,
39
+ max_tokens=max_tokens,
40
+ stream=True,
41
  temperature=temperature,
42
  top_p=top_p,
43
+ ):
44
+ token = message.choices[0].delta.content
45
+ response += token
46
+ yield response
47
 
48
+ # Gradio interface setup
 
 
 
 
49
  demo = gr.ChatInterface(
50
+ respond,
51
  additional_inputs=[
52
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
53
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
54
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
55
+ gr.Slider(
56
+ minimum=0.1,
57
+ maximum=1.0,
58
+ value=0.95,
59
+ step=0.05,
60
+ label="Top-p (nucleus sampling)",
61
+ ),
62
  ],
 
 
63
  )
64
 
65
  if __name__ == "__main__":
66
  demo.launch()