Futuresony commited on
Commit
ba09697
·
verified ·
1 Parent(s): fcd015e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -15
app.py CHANGED
@@ -5,31 +5,33 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
  from huggingface_hub import login
7
 
8
- # Authenticate with Hugging Face using secret HF_TOKEN
9
  hf_token = os.environ.get("HF_TOKEN")
10
  if not hf_token:
11
- raise RuntimeError("Missing HF_TOKEN in secrets. Please add it in your Space settings.")
12
-
13
  login(token=hf_token)
14
 
15
- # Load base model and LoRA adapter
16
- base_model_id = "unsloth/gemma-2-9b" # Or your base model
17
- lora_model_id = "Futuresony/future_12_10_2024" # Your LoRA fine-tuned model
18
 
19
- # Load tokenizer and model
20
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
21
- base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16, device_map="auto")
22
- model = PeftModel.from_pretrained(base_model, lora_model_id)
 
 
 
23
 
24
- # Ensure model is in evaluation mode
 
25
  model.eval()
26
 
 
27
  def generate_response(message, history, system_message, max_tokens, temperature, top_p):
28
  prompt = system_message + "\n\n"
29
-
30
  for user_input, bot_response in history:
31
  prompt += f"User: {user_input}\nAssistant: {bot_response}\n"
32
-
33
  prompt += f"User: {message}\nAssistant:"
34
 
35
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -46,7 +48,7 @@ def generate_response(message, history, system_message, max_tokens, temperature,
46
  final_response = response.split("Assistant:")[-1].strip()
47
  return final_response
48
 
49
- # Gradio ChatInterface
50
  demo = gr.ChatInterface(
51
  fn=generate_response,
52
  additional_inputs=[
@@ -55,9 +57,10 @@ demo = gr.ChatInterface(
55
  gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
56
  gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
57
  ],
58
- title="LoRA AI Chat Assistant",
59
- description="Chat with your fine-tuned model using LoRA adapter."
60
  )
61
 
62
  if __name__ == "__main__":
63
  demo.launch()
 
 
5
  from peft import PeftModel
6
  from huggingface_hub import login
7
 
8
+ # Login using HF token from secrets
9
  hf_token = os.environ.get("HF_TOKEN")
10
  if not hf_token:
11
+ raise RuntimeError("Missing HF_TOKEN in secrets.")
 
12
  login(token=hf_token)
13
 
14
+ # Base and LoRA model paths
15
+ base_model_id = "unsloth/gemma-2-9b"
16
+ lora_model_id = "Futuresony/future_12_10_2024"
17
 
18
+ # Load tokenizer and base model
19
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
20
+ base_model = AutoModelForCausalLM.from_pretrained(
21
+ base_model_id,
22
+ torch_dtype=torch.float16,
23
+ device_map="auto"
24
+ )
25
 
26
+ # Load LoRA weights
27
+ model = PeftModel.from_pretrained(base_model, lora_model_id)
28
  model.eval()
29
 
30
+ # Chat function
31
  def generate_response(message, history, system_message, max_tokens, temperature, top_p):
32
  prompt = system_message + "\n\n"
 
33
  for user_input, bot_response in history:
34
  prompt += f"User: {user_input}\nAssistant: {bot_response}\n"
 
35
  prompt += f"User: {message}\nAssistant:"
36
 
37
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
48
  final_response = response.split("Assistant:")[-1].strip()
49
  return final_response
50
 
51
+ # Gradio interface
52
  demo = gr.ChatInterface(
53
  fn=generate_response,
54
  additional_inputs=[
 
57
  gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
58
  gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
59
  ],
60
+ title="LoRA Chat Assistant (Gemma-2)",
61
+ description="Chat with your fine-tuned Gemma-2 LoRA model"
62
  )
63
 
64
  if __name__ == "__main__":
65
  demo.launch()
66
+