File size: 2,834 Bytes
9726fac
 
1e2d981
9726fac
1e2d981
9726fac
 
 
e5f6777
1e2d981
 
 
 
 
6a34e6a
 
 
1e2d981
9702f0e
 
 
e5f6777
9726fac
 
 
 
 
1e2d981
 
 
9726fac
 
9702f0e
9726fac
 
 
 
 
e5f6777
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9726fac
1d9d6ab
9a28b27
9726fac
 
1c58dec
e5f6777
e8e2a24
2e815cd
e8e2a24
bf5ce6e
1c58dec
9726fac
1e2d981
9726fac
 
 
8a8d916
e5f6777
 
1e2d981
 
d24f851
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import socket
import subprocess
import gradio as gr
from openai import OpenAI


subprocess.Popen("bash /home/user/app/start.sh", shell=True)

client = OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="sk-local", timeout=600)


def respond(
    message,
    history: list[tuple[str, str]],
    system_message=None,
    max_tokens=None,
    temperature=0.7
):
    messages = []
    if system_message:
        messages = [{"role": "system", "content": system_message}]

    for user, assistant in history:
        if user:
            messages.append({"role": "user", "content": user})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})

    messages.append({"role": "user", "content": message})

    try:
        stream = client.chat.completions.create(
            model="Deepseek-R1-0528-Qwen3-8B",
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=True,
            tools=[
                {
                    "type": "function",
                    "function": {
                        "name": "browser_search",
                        "description": (
                            "Search the web for a given query and return the most relevant results."
                        ),
                        "parameters": {
                            "type": "object",
                            "properties": {
                                "query": {
                                    "type": "string",
                                    "description": "The search query string.",
                                },
                                "max_results": {
                                    "type": "integer",
                                    "description": (
                                        "Maximum number of search results to return. "
                                        "If omitted the service will use its default."
                                    ),
                                    "default": 5,
                                },
                            },
                            "required": ["query"],
                        },
                    },
                },
                {"type": "code_interpreter"},
            ],
        )

        print("messages", messages)
        output = ""
        for chunk in stream:
            delta = chunk.choices[0].delta

            try:
                output += delta.reasoning_content
            except:
                output += delta.content or ""

            yield output

    except Exception as e:
        print(f"[Error] {e}")
        yield "⚠️ Llama.cpp server error"


demo = gr.ChatInterface(respond)

if __name__ == "__main__":
    demo.launch(show_api=False)