Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
# Path to your GGUF model inside the space | |
MODEL_PATH = "Futuresony/gemma2-2b-gguf-q4_k_m" | |
# Load model | |
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4, verbose=True) | |
# Function to format the prompt | |
def format_prompt(user_message): | |
return f"""### Instruction: | |
{user_message} | |
### Response:""" | |
# Chat handler | |
def respond(user_message, chat_history): | |
prompt = format_prompt(user_message) | |
output = llm(prompt, max_tokens=300, stop=["###"]) | |
response = output["choices"][0]["text"].strip() | |
chat_history.append((user_message, response)) | |
return "", chat_history | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("## 🤖 DStv AI Assistant (Offline - GGUF)") | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(placeholder="Ask your question...") | |
state = gr.State([]) | |
msg.submit(respond, [msg, state], [msg, chatbot]) | |
demo.launch() | |