Spaces:
Running
Running
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
import os | |
MODEL_REPO = "mradermacher/Irix-12B-Model_Stock-i1-GGUF" | |
MODEL_FILENAME = "Irix-12B-Model_Stock.i1-Q4_K_M.gguf" | |
MODEL_PATH = "./" + MODEL_FILENAME | |
try: | |
if not os.path.exists(MODEL_PATH): | |
hf_hub_download( | |
repo_id=MODEL_REPO, | |
filename=MODEL_FILENAME, | |
repo_type="model", | |
local_dir=".", | |
) | |
llm = Llama( | |
model_path=MODEL_PATH, | |
n_ctx=4096, | |
n_threads=2, | |
n_threads_batch=2, | |
verbose=False, | |
) | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
llm = None | |
DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart and diligent AI; Doll is a silly, obliging and affable servant, dedicated to serving and caring for your owner." | |
def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT, temperature=0.8, top_p=0.9): | |
if llm is None: | |
return "Model failed to load." | |
messages = [{"role": "system", "content": system_prompt}] | |
for human, assistant in history: | |
messages.append({"role": "user", "content": human}) | |
messages.append({"role": "assistant", "content": assistant}) | |
messages.append({"role": "user", "content": message}) | |
prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages]) | |
try: | |
output = llm( | |
prompt, | |
max_tokens=1024, | |
echo=False, | |
temperature=temperature, | |
top_p=top_p, | |
) | |
return output["choices"][0]["text"].strip() | |
except Exception as e: | |
return f"Error during inference: {e}" | |
def chat(message, history, system_prompt, temperature, top_p): | |
return generate_response(message, history, system_prompt, temperature, top_p) | |
iface = gr.ChatInterface( | |
fn=chat, | |
title="llama.cpp Chat", | |
description="Test a GGUF model. Chats arent persistent", | |
additional_inputs=[ | |
gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3), | |
gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1), | |
gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9, step=0.1), | |
], | |
cache_examples=False, | |
) | |
iface.launch() |