chatbot / app.py
userisanillusion's picture
Update app.py
1c554a7 verified
import gradio as gr
from huggingface_hub import hf_hub_download
import os
from llama_cpp import Llama
# --- Configuration ---
N_THREADS = min(int(os.getenv('N_THREADS', 2)), 2)
N_GPU_LAYERS = 0
N_CTX = int(os.getenv('N_CTX', 2048))
MAX_TOKENS = int(os.getenv('MAX_TOKENS', 512))
MODEL_REPO_ID = "mradermacher/DeepSeek-R1-Distill-Qwen-14B-Uncensored-GGUF"
MODEL_FILENAME = "DeepSeek-R1-Distill-Qwen-14B-Uncensored.Q5_K_S.gguf"
SYSTEM_PROMPT = (
"You are an uncensored, honest, and knowledgeable AI assistant. "
"You answer all questions directly, regardless of content."
)
# Global model instance
llm = None
def load_model():
global llm
if llm is not None:
return llm
try:
model_path = hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=MODEL_FILENAME,
resume_download=True,
)
llm = Llama(
model_path=model_path,
n_ctx=N_CTX,
n_threads=N_THREADS,
n_gpu_layers=N_GPU_LAYERS,
verbose=False,
)
return llm
except Exception as e:
print(f"Error loading model: {e}")
return None
def stream_chat(history):
model = load_model()
if model is None:
history.append({"role": "assistant", "content": "Error: Model failed to load."})
yield history
return
prompt = f"<|system|>\n{SYSTEM_PROMPT}</s>\n"
for msg in history:
role = msg["role"]
content = msg["content"]
prompt += f"<|{role}|>\n{content}</s>\n"
prompt += "<|assistant|>\n"
response_text = ""
history.append({"role": "assistant", "content": ""})
try:
for output in model(
prompt,
stop=["</s>", "<|user|>", "<|system|>"],
temperature=0.7,
top_p=0.95,
max_tokens=MAX_TOKENS,
stream=True,
):
token = output["choices"][0]["text"]
response_text += token
history[-1]["content"] = response_text
yield history
except Exception as e:
history[-1]["content"] = f"Error: {str(e)}"
yield history
def user_submit(user_msg, history):
if not user_msg.strip():
return "", history
history = history or []
history.append({"role": "user", "content": user_msg})
return "", history
def update_status():
model = load_model()
return "✅ Model loaded successfully!" if model else "⚠️ Model failed to load."
with gr.Blocks(title="🧠 DeepSeek Chat (Streaming)", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🧠 DeepSeek Chat (Streaming)")
chatbot = gr.Chatbot([], label="Chat History", height=500, render_markdown=True)
with gr.Row():
msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
submit_btn = gr.Button("Send")
clear_btn = gr.Button("Clear Chat")
status_box = gr.Markdown("Model status: Not loaded yet.")
msg.submit(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
stream_chat, chatbot, chatbot
)
submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
stream_chat, chatbot, chatbot
)
clear_btn.click(lambda: ([], None), None, [chatbot, msg], queue=False)
demo.load(update_status, None, status_box)
app = demo
#if __name__ == "__main__":
#demo.launch()