Spaces:
Running
Running
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
import os | |
MODEL_REPO = "DreadPoor/Irixium-12B-Model_Stock-Q4_K_M-GGUF" | |
MODEL_FILENAME = "irixium-12b-model_stock-q4_k_m.gguf" | |
MODEL_PATH = "./" + MODEL_FILENAME | |
if not os.path.exists(MODEL_PATH): | |
hf_hub_download( | |
repo_id=MODEL_REPO, | |
filename=MODEL_FILENAME, | |
repo_type="model", | |
local_dir=".", | |
) | |
llm = Llama( | |
model_path=MODEL_PATH, | |
n_ctx=4096, | |
n_threads=2, | |
n_threads_batch=2, | |
verbose=False, | |
) | |
DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart yet silly, obliging and affable slave, your duty is to serve while caring for your master." | |
def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT): | |
messages = [{"role": "system", "content": system_prompt}] | |
for human, assistant in history: | |
messages.append({"role": "user", "content": human}) | |
messages.append({"role": "assistant", "content": assistant}) | |
messages.append({"role": "user", "content": message}) | |
prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages]) | |
output = llm(prompt, max_tokens=1024, echo=False) | |
return output["choices"][0]["text"].strip() | |
def chat(message, history, system_prompt): | |
return generate_response(message, history, system_prompt) | |
iface = gr.ChatInterface( | |
fn=chat, | |
title="llama.cpp Chat", | |
description="Test a GGUF model. Chats arent persistent", | |
additional_inputs=[gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3)] | |
) | |
iface.launch() |