Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
import torch
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
|
8 |
-
# Load
|
9 |
-
|
10 |
-
|
|
|
11 |
|
12 |
-
def respond(message, history
|
13 |
messages = [{"role": "system", "content": system_message}]
|
14 |
|
15 |
# Add chat history to messages
|
@@ -21,16 +21,9 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
|
|
21 |
|
22 |
messages.append({"role": "user", "content": message})
|
23 |
|
24 |
-
# Tokenize input
|
25 |
-
inputs = tokenizer(message, return_tensors="pt").to("cpu")
|
26 |
-
|
27 |
# Generate response
|
28 |
-
|
29 |
-
outputs = model.generate(
|
30 |
-
**inputs, max_length=max_tokens, temperature=temperature, top_p=top_p
|
31 |
-
)
|
32 |
|
33 |
-
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
34 |
return response
|
35 |
|
36 |
# Define Gradio interface
|
|
|
1 |
import gradio as gr
|
2 |
+
from ctransformers import AutoModelForCausalLM
|
|
|
3 |
|
4 |
+
# Path to your downloaded GGUF model (change this if needed)
|
5 |
+
MODEL_PATH = "qwen2.5-0.5b-instruct-q2_k.gguf"
|
6 |
|
7 |
+
# Load model using ctransformers
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(
|
9 |
+
MODEL_PATH, model_type="qwen2", gpu_layers=30 # Adjust `gpu_layers` for performance
|
10 |
+
)
|
11 |
|
12 |
+
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
13 |
messages = [{"role": "system", "content": system_message}]
|
14 |
|
15 |
# Add chat history to messages
|
|
|
21 |
|
22 |
messages.append({"role": "user", "content": message})
|
23 |
|
|
|
|
|
|
|
24 |
# Generate response
|
25 |
+
response = model(messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
|
|
|
|
|
|
|
26 |
|
|
|
27 |
return response
|
28 |
|
29 |
# Define Gradio interface
|