JimmyK300 commited on
Commit
76a9806
·
verified ·
1 Parent(s): a0adab2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -1,15 +1,15 @@
1
  import gradio as gr
2
- from ctransformers import AutoModelForCausalLM
 
3
 
4
- # Path to your downloaded GGUF model (change this if needed)
5
- MODEL_PATH = "qwen2.5-0.5b-instruct-q2_k.gguf"
6
 
7
- # Load model using ctransformers
8
- model = AutoModelForCausalLM.from_pretrained(
9
- MODEL_PATH, model_type="qwen2", gpu_layers=30 # Adjust `gpu_layers` for performance
10
- )
11
 
12
- def respond(message, history, system_message, max_tokens, temperature, top_p):
13
  messages = [{"role": "system", "content": system_message}]
14
 
15
  # Add chat history to messages
@@ -21,9 +21,16 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
21
 
22
  messages.append({"role": "user", "content": message})
23
 
 
 
 
24
  # Generate response
25
- response = model(messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
 
 
 
26
 
 
27
  return response
28
 
29
  # Define Gradio interface
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
+ # Model name
6
+ MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
7
 
8
+ # Load tokenizer and model
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
 
11
 
12
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
13
  messages = [{"role": "system", "content": system_message}]
14
 
15
  # Add chat history to messages
 
21
 
22
  messages.append({"role": "user", "content": message})
23
 
24
+ # Tokenize input
25
+ inputs = tokenizer(message, return_tensors="pt").to("cpu")
26
+
27
  # Generate response
28
+ with torch.no_grad():
29
+ outputs = model.generate(
30
+ **inputs, max_length=max_tokens, temperature=temperature, top_p=top_p
31
+ )
32
 
33
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
  return response
35
 
36
  # Define Gradio interface