Spaces:
Running
Running
File size: 2,280 Bytes
2f0a0f2 ee85cfc e1d5f80 ee85cfc 24d89b6 e1d5f80 ba57f6d e1d5f80 ba57f6d e1d5f80 24d89b6 0228ec9 24d89b6 ba57f6d c6d1330 0228ec9 c6d1330 ba57f6d c6d1330 ba57f6d c6d1330 0228ec9 59eb87f ba57f6d 24d89b6 ba57f6d 0228ec9 ee85cfc 2f0a0f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
MODEL_REPO = "mradermacher/Irix-12B-Model_Stock-i1-GGUF"
MODEL_FILENAME = "Irix-12B-Model_Stock.i1-Q4_K_M.gguf"
MODEL_PATH = "./" + MODEL_FILENAME
try:
if not os.path.exists(MODEL_PATH):
hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILENAME,
repo_type="model",
local_dir=".",
)
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096,
n_threads=2,
n_threads_batch=2,
verbose=False,
)
except Exception as e:
print(f"Error loading model: {e}")
llm = None
DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart and diligent AI; Doll is a silly, obliging and affable servant, dedicated to serving and caring for your owner."
def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT, temperature=0.8, top_p=0.9):
if llm is None:
return "Model failed to load."
messages = [{"role": "system", "content": system_prompt}]
for human, assistant in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages])
try:
output = llm(
prompt,
max_tokens=1024,
echo=False,
temperature=temperature,
top_p=top_p,
)
return output["choices"][0]["text"].strip()
except Exception as e:
return f"Error during inference: {e}"
def chat(message, history, system_prompt, temperature, top_p):
return generate_response(message, history, system_prompt, temperature, top_p)
iface = gr.ChatInterface(
fn=chat,
title="llama.cpp Chat",
description="Test a GGUF model. Chats arent persistent",
additional_inputs=[
gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3),
gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1),
gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9, step=0.1),
],
cache_examples=False,
)
iface.launch() |