Spaces:
Running
Running
File size: 1,591 Bytes
2f0a0f2 ee85cfc e1d5f80 ee85cfc e1d5f80 da838d5 e1d5f80 59eb87f 0228ec9 c6d1330 0228ec9 c6d1330 0228ec9 e1d5f80 0228ec9 c6d1330 0228ec9 59eb87f 0228ec9 ee85cfc 2f0a0f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
MODEL_REPO = "DreadPoor/Irixium-12B-Model_Stock-Q4_K_M-GGUF"
MODEL_FILENAME = "irixium-12b-model_stock-q4_k_m.gguf"
MODEL_PATH = "./" + MODEL_FILENAME
if not os.path.exists(MODEL_PATH):
hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILENAME,
repo_type="model",
local_dir=".",
)
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096,
n_threads=2,
n_threads_batch=2,
verbose=False,
)
DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart yet silly, obliging and affable slave, your duty is to serve while caring for your master."
def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT):
messages = [{"role": "system", "content": system_prompt}]
for human, assistant in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages])
output = llm(prompt, max_tokens=1024, echo=False)
return output["choices"][0]["text"].strip()
def chat(message, history, system_prompt):
return generate_response(message, history, system_prompt)
iface = gr.ChatInterface(
fn=chat,
title="llama.cpp Chat",
description="Test a GGUF model. Chats arent persistent",
additional_inputs=[gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3)]
)
iface.launch() |