Dread2Poor commited on
Commit
ba57f6d
·
verified ·
1 Parent(s): 1dd05a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -23
app.py CHANGED
@@ -7,45 +7,62 @@ MODEL_REPO = "DreadPoor/Irixium-12B-Model_Stock-Q4_K_M-GGUF"
7
  MODEL_FILENAME = "irixium-12b-model_stock-q4_k_m.gguf"
8
  MODEL_PATH = "./" + MODEL_FILENAME
9
 
10
- if not os.path.exists(MODEL_PATH):
11
- hf_hub_download(
12
- repo_id=MODEL_REPO,
13
- filename=MODEL_FILENAME,
14
- repo_type="model",
15
- local_dir=".",
16
- )
 
17
 
18
- llm = Llama(
19
- model_path=MODEL_PATH,
20
- n_ctx=4096,
21
- n_threads=2,
22
- n_threads_batch=2,
23
- verbose=False,
24
- )
 
 
 
25
 
26
  DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart yet silly, obliging and affable slave, your duty is to serve while caring for your master."
27
 
28
- def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT):
 
 
29
  messages = [{"role": "system", "content": system_prompt}]
30
  for human, assistant in history:
31
  messages.append({"role": "user", "content": human})
32
  messages.append({"role": "assistant", "content": assistant})
33
  messages.append({"role": "user", "content": message})
34
-
35
  prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages])
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- output = llm(prompt, max_tokens=1024, echo=False)
38
- return output["choices"][0]["text"].strip()
39
-
40
- def chat(message, history, system_prompt):
41
- return generate_response(message, history, system_prompt)
42
 
43
  iface = gr.ChatInterface(
44
  fn=chat,
45
  title="llama.cpp Chat",
46
  description="Test a GGUF model. Chats arent persistent",
47
- additional_inputs=[gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3)],
48
- cache_examples=False
 
 
 
 
49
  )
50
 
51
  iface.launch()
 
7
  MODEL_FILENAME = "irixium-12b-model_stock-q4_k_m.gguf"
8
  MODEL_PATH = "./" + MODEL_FILENAME
9
 
10
+ try:
11
+ if not os.path.exists(MODEL_PATH):
12
+ hf_hub_download(
13
+ repo_id=MODEL_REPO,
14
+ filename=MODEL_FILENAME,
15
+ repo_type="model",
16
+ local_dir=".",
17
+ )
18
 
19
+ llm = Llama(
20
+ model_path=MODEL_PATH,
21
+ n_ctx=4096,
22
+ n_threads=2,
23
+ n_threads_batch=2,
24
+ verbose=False,
25
+ )
26
+ except Exception as e:
27
+ print(f"Error loading model: {e}")
28
+ llm = None
29
 
30
  DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart yet silly, obliging and affable slave, your duty is to serve while caring for your master."
31
 
32
+ def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT, temperature=0.7, top_p=0.9):
33
+ if llm is None:
34
+ return "Model failed to load."
35
  messages = [{"role": "system", "content": system_prompt}]
36
  for human, assistant in history:
37
  messages.append({"role": "user", "content": human})
38
  messages.append({"role": "assistant", "content": assistant})
39
  messages.append({"role": "user", "content": message})
 
40
  prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages])
41
+ try:
42
+ output = llm(
43
+ prompt,
44
+ max_tokens=1024,
45
+ echo=False,
46
+ temperature=temperature,
47
+ top_p=top_p,
48
+ )
49
+ return output["choices"][0]["text"].strip()
50
+ except Exception as e:
51
+ return f"Error during inference: {e}"
52
 
53
+ def chat(message, history, system_prompt, temperature, top_p):
54
+ return generate_response(message, history, system_prompt, temperature, top_p)
 
 
 
55
 
56
  iface = gr.ChatInterface(
57
  fn=chat,
58
  title="llama.cpp Chat",
59
  description="Test a GGUF model. Chats arent persistent",
60
+ additional_inputs=[
61
+ gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3),
62
+ gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7, step=0.1),
63
+ gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9, step=0.1),
64
+ ],
65
+ cache_examples=False,
66
  )
67
 
68
  iface.launch()