Dread2Poor commited on
Commit
c6d1330
·
verified ·
1 Parent(s): e1d5f80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -9
app.py CHANGED
@@ -17,21 +17,84 @@ if not os.path.exists(MODEL_PATH):
17
 
18
  llm = Llama(
19
  model_path=MODEL_PATH,
20
- n_ctx=4096,
21
  n_threads=2,
22
  n_threads_batch=2,
23
  verbose=False,
24
  )
25
 
26
- def generate_response(message, history):
27
- prompt = f"{message}"
28
- output = llm(prompt, max_tokens=128, echo=False)
 
 
 
 
 
 
 
 
 
 
 
 
29
  return output["choices"][0]["text"].strip()
30
 
31
- iface = gr.ChatInterface(
32
- fn=generate_response,
33
- title="llama.cpp Chat",
34
- description="Chat with a GGUF model.",
35
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  iface.launch()
 
17
 
18
  llm = Llama(
19
  model_path=MODEL_PATH,
20
+ n_ctx=4096, # increased context window.
21
  n_threads=2,
22
  n_threads_batch=2,
23
  verbose=False,
24
  )
25
 
26
+ def generate_response(message, history, system_prompt, max_tokens, temperature, top_p):
27
+ messages = [{"role": "system", "content": system_prompt}]
28
+ for item in history:
29
+ messages.append(item)
30
+ messages.append({"role": "user", "content": message})
31
+
32
+ prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages])
33
+
34
+ output = llm(
35
+ prompt,
36
+ max_tokens=max_tokens,
37
+ temperature=temperature,
38
+ top_p=top_p,
39
+ echo=False,
40
+ )
41
  return output["choices"][0]["text"].strip()
42
 
43
+ with gr.Blocks() as iface:
44
+ system_prompt_input = gr.Textbox(
45
+ label="System Prompt",
46
+ value="You are a helpful assistant.",
47
+ lines=3,
48
+ )
49
+ max_tokens_slider = gr.Slider(
50
+ label="Max Tokens",
51
+ minimum=32,
52
+ maximum=4096,
53
+ value=128,
54
+ step=32,
55
+ )
56
+ temperature_slider = gr.Slider(
57
+ label="Temperature",
58
+ minimum=0.1,
59
+ maximum=1.0,
60
+ value=0.8,
61
+ step=0.1,
62
+ )
63
+ top_p_slider = gr.Slider(
64
+ label="Top P",
65
+ minimum=0.1,
66
+ maximum=1.0,
67
+ value=0.9,
68
+ step=0.1,
69
+ )
70
+ chatbot = gr.Chatbot(type="messages")
71
+ message = gr.Textbox(label="Message")
72
+ send_button = gr.Button("Send")
73
+ state = gr.State([])
74
+
75
+ send_button.click(
76
+ generate_response,
77
+ inputs=[
78
+ message,
79
+ state,
80
+ system_prompt_input,
81
+ max_tokens_slider,
82
+ temperature_slider,
83
+ top_p_slider,
84
+ ],
85
+ outputs=[chatbot, state],
86
+ )
87
+ message.submit(
88
+ generate_response,
89
+ inputs=[
90
+ message,
91
+ state,
92
+ system_prompt_input,
93
+ max_tokens_slider,
94
+ temperature_slider,
95
+ top_p_slider,
96
+ ],
97
+ outputs=[chatbot, state],
98
+ )
99
 
100
  iface.launch()