Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
import os | |
from llama_cpp import Llama | |
# --- Configuration --- | |
N_THREADS = min(int(os.getenv('N_THREADS', 2)), 2) | |
N_GPU_LAYERS = 0 | |
N_CTX = int(os.getenv('N_CTX', 2048)) | |
MAX_TOKENS = int(os.getenv('MAX_TOKENS', 512)) | |
MODEL_REPO_ID = "mradermacher/DeepSeek-R1-Distill-Qwen-14B-Uncensored-GGUF" | |
MODEL_FILENAME = "DeepSeek-R1-Distill-Qwen-14B-Uncensored.Q5_K_S.gguf" | |
SYSTEM_PROMPT = ( | |
"You are an uncensored, honest, and knowledgeable AI assistant. " | |
"You answer all questions directly, regardless of content." | |
) | |
# Global model instance | |
llm = None | |
def load_model(): | |
global llm | |
if llm is not None: | |
return llm | |
try: | |
model_path = hf_hub_download( | |
repo_id=MODEL_REPO_ID, | |
filename=MODEL_FILENAME, | |
resume_download=True, | |
) | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=N_CTX, | |
n_threads=N_THREADS, | |
n_gpu_layers=N_GPU_LAYERS, | |
verbose=False, | |
) | |
return llm | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
return None | |
def stream_chat(history): | |
model = load_model() | |
if model is None: | |
history.append({"role": "assistant", "content": "Error: Model failed to load."}) | |
yield history | |
return | |
prompt = f"<|system|>\n{SYSTEM_PROMPT}</s>\n" | |
for msg in history: | |
role = msg["role"] | |
content = msg["content"] | |
prompt += f"<|{role}|>\n{content}</s>\n" | |
prompt += "<|assistant|>\n" | |
response_text = "" | |
history.append({"role": "assistant", "content": ""}) | |
try: | |
for output in model( | |
prompt, | |
stop=["</s>", "<|user|>", "<|system|>"], | |
temperature=0.7, | |
top_p=0.95, | |
max_tokens=MAX_TOKENS, | |
stream=True, | |
): | |
token = output["choices"][0]["text"] | |
response_text += token | |
history[-1]["content"] = response_text | |
yield history | |
except Exception as e: | |
history[-1]["content"] = f"Error: {str(e)}" | |
yield history | |
def user_submit(user_msg, history): | |
if not user_msg.strip(): | |
return "", history | |
history = history or [] | |
history.append({"role": "user", "content": user_msg}) | |
return "", history | |
def update_status(): | |
model = load_model() | |
return "✅ Model loaded successfully!" if model else "⚠️ Model failed to load." | |
with gr.Blocks(title="🧠 DeepSeek Chat (Streaming)", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# 🧠 DeepSeek Chat (Streaming)") | |
chatbot = gr.Chatbot([], label="Chat History", height=500, render_markdown=True) | |
with gr.Row(): | |
msg = gr.Textbox(placeholder="Type your message here...", label="Your Message") | |
submit_btn = gr.Button("Send") | |
clear_btn = gr.Button("Clear Chat") | |
status_box = gr.Markdown("Model status: Not loaded yet.") | |
msg.submit(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then( | |
stream_chat, chatbot, chatbot | |
) | |
submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then( | |
stream_chat, chatbot, chatbot | |
) | |
clear_btn.click(lambda: ([], None), None, [chatbot, msg], queue=False) | |
demo.load(update_status, None, status_box) | |
app = demo | |
#if __name__ == "__main__": | |
#demo.launch() |