Dread2Poor commited on
Commit
e1d5f80
·
verified ·
1 Parent(s): ffd3660

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -83
app.py CHANGED
@@ -1,89 +1,37 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
 
3
  import os
4
- import requests
5
 
6
- MODEL_PATH = "irixium-12b-model_stock-q4_k_m.gguf"
7
- DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
8
-
9
- def download_model(url, save_path):
10
- try:
11
- response = requests.get(url, stream=True)
12
- response.raise_for_status()
13
- with open(save_path, "wb") as file:
14
- for chunk in response.iter_content(chunk_size=8192):
15
- file.write(chunk)
16
- return "Model downloaded successfully."
17
- except Exception as e:
18
- return f"Error: {e}"
19
-
20
- def load_model(model_path):
21
- try:
22
- llm = Llama(model_path, n_threads=2, n_gpu_layers=0) #force cpu, and set threads.
23
- return llm
24
- except Exception as e:
25
- return f"Error: {e}"
26
-
27
- def apply_chat_template(model_name, messages, system_prompt):
28
- model_name_lower = model_name.lower()
29
- if "llama-2" in model_name_lower:
30
- template = "<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n{user_message} [/INST] {assistant_message}</s>"
31
- elif "mistral" in model_name_lower:
32
- template = "<s>[INST] {user_message} [/INST] {assistant_message}</s>"
33
- else:
34
- template = "{user_message} {assistant_message}"
35
-
36
- formatted_messages = []
37
- for message in messages:
38
- if message["role"] == "system":
39
- system_prompt = message["content"]
40
- elif message["role"] == "user":
41
- formatted_messages.append(template.format(system_prompt=system_prompt, user_message=message["content"], assistant_message=""))
42
- elif message["role"] == "assistant":
43
- if formatted_messages:
44
- formatted_messages[-1] += message["content"]
45
- else:
46
- formatted_messages.append(message["content"])
47
- return "".join(formatted_messages)
48
-
49
- def generate_response(prompt, model):
50
- if isinstance(model, str):
51
- return model
52
- try:
53
- output = model(prompt, max_tokens=256)
54
- return output["choices"][0]["text"].strip()
55
- except Exception as e:
56
- return f"Error: {e}"
57
-
58
- def inference(message, history, model_url, system_prompt):
59
- if model_url and not os.path.exists(MODEL_PATH):
60
- download_result = download_model(model_url, MODEL_PATH)
61
- if "Error" in download_result:
62
- return history + [{"role": "assistant", "content": download_result}], history
63
-
64
- llm = load_model(MODEL_PATH)
65
- if isinstance(llm, str):
66
- return history + [{"role": "assistant", "content": llm}], history
67
-
68
- messages = [{"role": "system", "content": system_prompt}]
69
- for item in history:
70
- messages.append(item)
71
- messages.append({"role": "user", "content": message})
72
-
73
- prompt = apply_chat_template(llm.model_path, messages, system_prompt)
74
- response = generate_response(prompt, llm)
75
- history.append({"role": "assistant", "content": response})
76
- return history, history
77
-
78
- with gr.Blocks() as iface:
79
- model_url_input = gr.Textbox(label="Model URL (GGUF)", placeholder="Enter GGUF model URL...")
80
- system_prompt_input = gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3)
81
- chatbot = gr.Chatbot(type="messages")
82
- message = gr.Textbox(label="Message")
83
- send_button = gr.Button("Send")
84
- state = gr.State([])
85
-
86
- send_button.click(inference, inputs=[message, state, model_url_input, system_prompt_input], outputs=[chatbot, state])
87
- message.submit(inference, inputs=[message, state, model_url_input, system_prompt_input], outputs=[chatbot, state])
88
 
89
  iface.launch()
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
  import os
 
5
 
6
+ MODEL_REPO = "DreadPoor/Irixium-12B-Model_Stock-Q4_K_M-GGUF"
7
+ MODEL_FILENAME = "irixium-12b-model_stock-q4_k_m.gguf"
8
+ MODEL_PATH = "./" + MODEL_FILENAME
9
+
10
+ if not os.path.exists(MODEL_PATH):
11
+ hf_hub_download(
12
+ repo_id=MODEL_REPO,
13
+ filename=MODEL_FILENAME,
14
+ repo_type="model",
15
+ local_dir=".",
16
+ )
17
+
18
+ llm = Llama(
19
+ model_path=MODEL_PATH,
20
+ n_ctx=4096,
21
+ n_threads=2,
22
+ n_threads_batch=2,
23
+ verbose=False,
24
+ )
25
+
26
+ def generate_response(message, history):
27
+ prompt = f"{message}"
28
+ output = llm(prompt, max_tokens=128, echo=False)
29
+ return output["choices"][0]["text"].strip()
30
+
31
+ iface = gr.ChatInterface(
32
+ fn=generate_response,
33
+ title="llama.cpp Chat",
34
+ description="Chat with a GGUF model.",
35
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  iface.launch()