Spaces:

Futuresony
/

FutureX

Sleeping

FutureX / app.py

Update app.py

0621ae2 verified 3 months ago

923 Bytes

	import gradio as gr
	from llama_cpp import Llama

	# Path to your GGUF model inside the space
	MODEL_PATH = "Futuresony/gemma2-2b-gguf-q4_k_m"

	# Load model
	llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4, verbose=True)

	# Function to format the prompt
	def format_prompt(user_message):
	return f"""### Instruction:
	{user_message}

	### Response:"""

	# Chat handler
	def respond(user_message, chat_history):
	prompt = format_prompt(user_message)
	output = llm(prompt, max_tokens=300, stop=["###"])
	response = output["choices"][0]["text"].strip()
	chat_history.append((user_message, response))
	return "", chat_history

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## 🤖 DStv AI Assistant (Offline - GGUF)")
	chatbot = gr.Chatbot()
	msg = gr.Textbox(placeholder="Ask your question...")
	state = gr.State([])

	msg.submit(respond, [msg, state], [msg, chatbot])

	demo.launch()