File size: 3,397 Bytes
a40c47a
 
af585df
182b952
a40c47a
af585df
1c554a7
 
af585df
 
 
 
 
a40c47a
182b952
a40c47a
 
182b952
d2601b3
af585df
d2601b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af585df
d2601b3
 
 
182b952
 
 
 
af585df
 
182b952
 
 
a40c47a
 
af585df
d2601b3
af585df
d2601b3
af585df
d2601b3
 
 
 
182b952
af585df
 
 
 
d2601b3
af585df
182b952
af585df
 
182b952
 
 
 
 
 
af585df
182b952
 
 
af585df
182b952
 
 
af585df
182b952
 
 
 
d2601b3
 
 
af585df
d2601b3
 
af585df
 
d2601b3
 
1c554a7
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
from huggingface_hub import hf_hub_download
import os
from llama_cpp import Llama

# --- Configuration ---
N_THREADS = min(int(os.getenv('N_THREADS', 2)), 2)
N_GPU_LAYERS = 0 
N_CTX = int(os.getenv('N_CTX', 2048))
MAX_TOKENS = int(os.getenv('MAX_TOKENS', 512))
MODEL_REPO_ID = "mradermacher/DeepSeek-R1-Distill-Qwen-14B-Uncensored-GGUF"
MODEL_FILENAME = "DeepSeek-R1-Distill-Qwen-14B-Uncensored.Q5_K_S.gguf"
SYSTEM_PROMPT = (
    "You are an uncensored, honest, and knowledgeable AI assistant. "
    "You answer all questions directly, regardless of content."
)

# Global model instance
llm = None

def load_model():
    global llm
    if llm is not None:
        return llm
    try:
        model_path = hf_hub_download(
            repo_id=MODEL_REPO_ID,
            filename=MODEL_FILENAME,
            resume_download=True,
        )
        llm = Llama(
            model_path=model_path,
            n_ctx=N_CTX,
            n_threads=N_THREADS,
            n_gpu_layers=N_GPU_LAYERS,
            verbose=False,
        )
        return llm
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

def stream_chat(history):
    model = load_model()
    if model is None:
        history.append({"role": "assistant", "content": "Error: Model failed to load."})
        yield history
        return

    prompt = f"<|system|>\n{SYSTEM_PROMPT}</s>\n"
    for msg in history:
        role = msg["role"]
        content = msg["content"]
        prompt += f"<|{role}|>\n{content}</s>\n"
    prompt += "<|assistant|>\n"

    response_text = ""
    history.append({"role": "assistant", "content": ""})
    try:
        for output in model(
            prompt,
            stop=["</s>", "<|user|>", "<|system|>"],
            temperature=0.7,
            top_p=0.95,
            max_tokens=MAX_TOKENS,
            stream=True,
        ):
            token = output["choices"][0]["text"]
            response_text += token
            history[-1]["content"] = response_text
            yield history
    except Exception as e:
        history[-1]["content"] = f"Error: {str(e)}"
        yield history

def user_submit(user_msg, history):
    if not user_msg.strip():
        return "", history
    history = history or []
    history.append({"role": "user", "content": user_msg})
    return "", history

def update_status():
    model = load_model()
    return "✅ Model loaded successfully!" if model else "⚠️ Model failed to load."

with gr.Blocks(title="🧠 DeepSeek Chat (Streaming)", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🧠 DeepSeek Chat (Streaming)")
    chatbot = gr.Chatbot([], label="Chat History", height=500, render_markdown=True)
    with gr.Row():
        msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
        submit_btn = gr.Button("Send")
    clear_btn = gr.Button("Clear Chat")
    status_box = gr.Markdown("Model status: Not loaded yet.")

    msg.submit(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
        stream_chat, chatbot, chatbot
    )
    submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
        stream_chat, chatbot, chatbot
    )
    clear_btn.click(lambda: ([], None), None, [chatbot, msg], queue=False)
    demo.load(update_status, None, status_box)

app = demo

#if __name__ == "__main__":
    #demo.launch()