import gradio as gr from llama_cpp import Llama # Path to your GGUF model inside the space MODEL_PATH = "Futuresony/gemma2-2b-gguf-q4_k_m" # Load model llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4, verbose=True) # Function to format the prompt def format_prompt(user_message): return f"""### Instruction: {user_message} ### Response:""" # Chat handler def respond(user_message, chat_history): prompt = format_prompt(user_message) output = llm(prompt, max_tokens=300, stop=["###"]) response = output["choices"][0]["text"].strip() chat_history.append((user_message, response)) return "", chat_history # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🤖 DStv AI Assistant (Offline - GGUF)") chatbot = gr.Chatbot() msg = gr.Textbox(placeholder="Ask your question...") state = gr.State([]) msg.submit(respond, [msg, state], [msg, chatbot]) demo.launch()