DreadPoor's picture
Update app.py
72c3482 verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
MODEL_REPO = "DreadPoor/Sylvan_Lux-Q4_K_M-GGUF"
MODEL_FILENAME = "sylvan_lux-q4_k_m.gguf"
MODEL_PATH = "./" + MODEL_FILENAME
try:
if not os.path.exists(MODEL_PATH):
hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILENAME,
repo_type="model",
local_dir=".",
)
llm = Llama(
model_path=MODEL_PATH,
n_ctx=6144,
n_threads=2,
n_threads_batch=2,
verbose=False,
)
except Exception as e:
print(f"Error loading model: {e}")
llm = None
DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart and capable AI; A silly, obliging and affable slave, dedicated to serving and caring for your master."
def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT, temperature=0.7, top_p=0.9):
if llm is None:
return "Model failed to load."
messages = [{"role": "system", "content": system_prompt}]
for human, assistant in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages])
try:
stream = llm(
prompt,
max_tokens=1024,
echo=False,
temperature=temperature,
top_p=top_p,
stream=True, # Enable streaming
)
response_text = ""
for part in stream:
response_text += part["choices"][0]["text"]
return response_text.strip()
except Exception as e:
return f"Error during inference: {e}"
def chat(message, history, system_prompt, temperature, top_p):
return generate_response(message, history, system_prompt, temperature, top_p)
iface = gr.ChatInterface(
fn=chat,
title="llama.cpp Chat",
description="Duplicate the space then edit in a GGUF model to test. Chats arent persistent",
additional_inputs=[
gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3),
gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1),
gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9, step=0.1),
],
cache_examples=False,
)
iface.launch()