FutureX / app.py
Futuresony's picture
Update app.py
0621ae2 verified
raw
history blame
923 Bytes
import gradio as gr
from llama_cpp import Llama
# Path to your GGUF model inside the space
MODEL_PATH = "Futuresony/gemma2-2b-gguf-q4_k_m"
# Load model
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4, verbose=True)
# Function to format the prompt
def format_prompt(user_message):
return f"""### Instruction:
{user_message}
### Response:"""
# Chat handler
def respond(user_message, chat_history):
prompt = format_prompt(user_message)
output = llm(prompt, max_tokens=300, stop=["###"])
response = output["choices"][0]["text"].strip()
chat_history.append((user_message, response))
return "", chat_history
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🤖 DStv AI Assistant (Offline - GGUF)")
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Ask your question...")
state = gr.State([])
msg.submit(respond, [msg, state], [msg, chatbot])
demo.launch()