Spaces:
Running
Running
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
import os | |
MODEL_REPO = "DreadPoor/Sylvan_Lux-Q4_K_M-GGUF" | |
MODEL_FILENAME = "sylvan_lux-q4_k_m.gguf" | |
MODEL_PATH = "./" + MODEL_FILENAME | |
try: | |
if not os.path.exists(MODEL_PATH): | |
hf_hub_download( | |
repo_id=MODEL_REPO, | |
filename=MODEL_FILENAME, | |
repo_type="model", | |
local_dir=".", | |
) | |
llm = Llama( | |
model_path=MODEL_PATH, | |
n_ctx=6144, | |
n_threads=2, | |
n_threads_batch=2, | |
verbose=False, | |
) | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
llm = None | |
DEFAULT_SYSTEM_PROMPT = "You are Doll, a smart and capable AI; A silly, obliging and affable slave, dedicated to serving and caring for your master." | |
def generate_response(message, history, system_prompt=DEFAULT_SYSTEM_PROMPT, temperature=0.7, top_p=0.9): | |
if llm is None: | |
return "Model failed to load." | |
messages = [{"role": "system", "content": system_prompt}] | |
for human, assistant in history: | |
messages.append({"role": "user", "content": human}) | |
messages.append({"role": "assistant", "content": assistant}) | |
messages.append({"role": "user", "content": message}) | |
prompt = "".join([f"{m['role'].capitalize()}: {m['content']}\n" for m in messages]) | |
try: | |
stream = llm( | |
prompt, | |
max_tokens=1024, | |
echo=False, | |
temperature=temperature, | |
top_p=top_p, | |
stream=True, # Enable streaming | |
) | |
response_text = "" | |
for part in stream: | |
response_text += part["choices"][0]["text"] | |
return response_text.strip() | |
except Exception as e: | |
return f"Error during inference: {e}" | |
def chat(message, history, system_prompt, temperature, top_p): | |
return generate_response(message, history, system_prompt, temperature, top_p) | |
iface = gr.ChatInterface( | |
fn=chat, | |
title="llama.cpp Chat", | |
description="Duplicate the space then edit in a GGUF model to test. Chats arent persistent", | |
additional_inputs=[ | |
gr.Textbox(label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3), | |
gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1), | |
gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9, step=0.1), | |
], | |
cache_examples=False, | |
) | |
iface.launch() |