Dread2Poor commited on
Commit
ee85cfc
·
verified ·
1 Parent(s): fa6cf2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -76
app.py CHANGED
@@ -1,77 +1,61 @@
1
- from llama_cpp import Llama
2
- from huggingface_hub import hf_hub_download
3
  import gradio as gr
4
-
5
- hf_hub_download(repo_id="Dread2Poor/Killamanjaro-8B-Model_Stock-Q4_K_M-GGUF", filename="killamanjaro-8b-model_stock-q4_k_m.gguf", repo_type="model", local_dir=".")
6
-
7
- # Initialize the model
8
- llm = Llama(model_path="./killamanjaro-8b-model_stock-q4_k_m.gguf", n_threads=2)
9
-
10
- # System prompt
11
- system_prompt = "You are my mother. You are caring, obliging and affable."
12
-
13
- # Chat history
14
- history = [{"role": "system", "content": system_prompt}]
15
-
16
- def format_prompt(history):
17
- """
18
- Format the chat history into a prompt for the model.
19
- """
20
- prompt = ""
21
- for turn in history:
22
- if turn["role"] == "system":
23
- prompt += f"<|system|>\n{turn['content']}\n<|end|>\n"
24
- elif turn["role"] == "user":
25
- prompt += f"<|user|>\n{turn['content']}\n<|end|>\n"
26
- elif turn["role"] == "assistant":
27
- prompt += f"<|assistant|>\n{turn['content']}\n<|end|>\n"
28
- prompt += "<|assistant|>\n" # Model expects the assistant's reply next
29
- return prompt
30
-
31
- def chat_with_model(user_input):
32
- """
33
- Engage in a chat with the model using the provided user input.
34
- """
35
- global history
36
-
37
- # Append user input to history
38
- history.append({"role": "user", "content": user_input})
39
-
40
- # Format the prompt with history
41
- prompt = format_prompt(history)
42
-
43
- # Generate response
44
- response = llm(prompt, max_tokens=150, stop=["<|end|>"])
45
-
46
- # Extract assistant's reply
47
- assistant_reply = response['choices'][0]['text'].strip()
48
-
49
- # Append assistant's reply to history
50
- history.append({"role": "assistant", "content": assistant_reply})
51
-
52
- return assistant_reply
53
-
54
- def reset_history():
55
- """
56
- Reset the chat history to the initial state.
57
- """
58
- global history
59
- history = [{"role": "system", "content": system_prompt}]
60
- return gr.update(value=""), gr.update(value=[])
61
-
62
- # Create Gradio interface
63
- with gr.Blocks() as demo:
64
- chatbot = gr.Chatbot()
65
- with gr.Row():
66
- with gr.Column(scale=4):
67
- user_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
68
- with gr.Column(scale=1):
69
- submit_btn = gr.Button("Send")
70
- with gr.Row():
71
- clear_btn = gr.Button("Clear Chat")
72
-
73
- submit_btn.click(chat_with_model, inputs=user_input, outputs=chatbot)
74
- clear_btn.click(reset_history, inputs=[], outputs=[user_input, chatbot])
75
-
76
- # Launch the interface
77
- demo.launch()
 
 
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
+ import os
4
+ import requests
5
+ import tempfile
6
+
7
+ MODEL_PATH = "model.gguf" # Default model name
8
+ MODEL_URL = None # Optional URL to download the model from
9
+
10
+ def download_model(url, save_path):
11
+ """Downloads a file from a URL."""
12
+ response = requests.get(url, stream=True)
13
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
14
+
15
+ with open(save_path, "wb") as file:
16
+ for chunk in response.iter_content(chunk_size=8192):
17
+ file.write(chunk)
18
+
19
+ def load_model(model_path):
20
+ """Loads a GGUF model using llama-cpp-python."""
21
+ try:
22
+ llm = Llama(model_path)
23
+ return llm
24
+ except Exception as e:
25
+ return f"Error loading model: {e}"
26
+
27
+ def generate_response(prompt, model):
28
+ """Generates a response using the loaded model."""
29
+ if isinstance(model, str): # if there was an error loading the model
30
+ return model
31
+ try:
32
+ output = model(prompt, max_tokens=256) # Adjust max_tokens as needed
33
+ return output["choices"][0]["text"]
34
+ except Exception as e:
35
+ return f"Error generating response: {e}"
36
+
37
+ # Download model if URL is provided and model doesn't exist
38
+ if MODEL_URL and not os.path.exists(MODEL_PATH):
39
+ print(f"Downloading model from {MODEL_URL}...")
40
+ try:
41
+ download_model(MODEL_URL, MODEL_PATH)
42
+ print("Model downloaded successfully.")
43
+ except Exception as e:
44
+ print(f"Error downloading model: {e}")
45
+
46
+ # Load the model
47
+ llm = load_model(MODEL_PATH)
48
+
49
+ def inference(prompt):
50
+ return generate_response(prompt, llm)
51
+
52
+ # Gradio Interface
53
+ iface = gr.Interface(
54
+ fn=inference,
55
+ inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
56
+ outputs=gr.Textbox(lines=10),
57
+ title="llama.cpp CPU Inference",
58
+ description="Generate text using a GGUF model with llama.cpp on CPU.",
59
+ )
60
+
61
+ iface.launch()