JimmyK300 commited on
Commit
6f0c766
·
verified ·
1 Parent(s): 7346e43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -27
app.py CHANGED
@@ -1,36 +1,26 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
 
5
- # Model name
6
- MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
7
 
8
- # Load tokenizer and model
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
11
 
12
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
13
- messages = [{"role": "system", "content": system_message}]
14
-
15
- # Add chat history to messages
16
  for user_msg, assistant_msg in history:
17
- if user_msg:
18
- messages.append({"role": "user", "content": user_msg})
19
- if assistant_msg:
20
- messages.append({"role": "assistant", "content": assistant_msg})
21
-
22
- messages.append({"role": "user", "content": message})
23
-
24
- # Tokenize input
25
- inputs = tokenizer(message, return_tensors="pt").to("cpu")
26
-
27
  # Generate response
28
- with torch.no_grad():
29
- outputs = model.generate(
30
- **inputs, max_length=max_tokens, temperature=temperature, top_p=top_p
31
- )
32
-
33
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
  return response
35
 
36
  # Define Gradio interface
 
1
  import gradio as gr
2
+ from ctransformers import AutoModelForCausalLM
 
3
 
4
+ # Model path or name
5
+ MODEL_PATH = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
6
 
7
+ # Load the model
8
+ model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, model_type="qwen", device="cpu")
 
9
 
10
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
11
+ # Prepare the prompt with system message and history
12
+ prompt = system_message + "\n"
 
13
  for user_msg, assistant_msg in history:
14
+ prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
15
+ prompt += f"User: {message}\nAssistant:"
16
+
 
 
 
 
 
 
 
17
  # Generate response
18
+ response = model(
19
+ prompt,
20
+ max_new_tokens=max_tokens,
21
+ temperature=temperature,
22
+ top_p=top_p,
23
+ )
24
  return response
25
 
26
  # Define Gradio interface