File size: 2,834 Bytes
9726fac 1e2d981 9726fac 1e2d981 9726fac e5f6777 1e2d981 6a34e6a 1e2d981 9702f0e e5f6777 9726fac 1e2d981 9726fac 9702f0e 9726fac e5f6777 9726fac 1d9d6ab 9a28b27 9726fac 1c58dec e5f6777 e8e2a24 2e815cd e8e2a24 bf5ce6e 1c58dec 9726fac 1e2d981 9726fac 8a8d916 e5f6777 1e2d981 d24f851 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import socket
import subprocess
import gradio as gr
from openai import OpenAI
subprocess.Popen("bash /home/user/app/start.sh", shell=True)
client = OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="sk-local", timeout=600)
def respond(
message,
history: list[tuple[str, str]],
system_message=None,
max_tokens=None,
temperature=0.7
):
messages = []
if system_message:
messages = [{"role": "system", "content": system_message}]
for user, assistant in history:
if user:
messages.append({"role": "user", "content": user})
if assistant:
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
try:
stream = client.chat.completions.create(
model="Deepseek-R1-0528-Qwen3-8B",
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
tools=[
{
"type": "function",
"function": {
"name": "browser_search",
"description": (
"Search the web for a given query and return the most relevant results."
),
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query string.",
},
"max_results": {
"type": "integer",
"description": (
"Maximum number of search results to return. "
"If omitted the service will use its default."
),
"default": 5,
},
},
"required": ["query"],
},
},
},
{"type": "code_interpreter"},
],
)
print("messages", messages)
output = ""
for chunk in stream:
delta = chunk.choices[0].delta
try:
output += delta.reasoning_content
except:
output += delta.content or ""
yield output
except Exception as e:
print(f"[Error] {e}")
yield "⚠️ Llama.cpp server error"
demo = gr.ChatInterface(respond)
if __name__ == "__main__":
demo.launch(show_api=False)
|