Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -21,8 +21,13 @@ import gradio as gr
|
|
21 |
# a dummy value when none is provided. The *base_url* points to the local
|
22 |
# vLLM server that speaks the OpenAI REST dialect.
|
23 |
# -----------------------------------------------------------------------------
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
28 |
# Chat handler
|
@@ -57,24 +62,16 @@ def stream_completion(message: str,
|
|
57 |
|
58 |
#try:
|
59 |
# Kick off streaming completion
|
60 |
-
response =
|
61 |
model="Qwen/Qwen3-4B",
|
62 |
messages=messages,
|
63 |
temperature=temperature,
|
64 |
top_p=top_p,
|
65 |
max_tokens=max_tokens,
|
66 |
-
stream=True,
|
67 |
)
|
68 |
|
69 |
-
assistant =
|
70 |
-
|
71 |
-
# ``delta.content`` is None for e.g. role announcements; guard with or ""
|
72 |
-
delta = chunk.choices[0].delta.content or ""
|
73 |
-
assistant += delta
|
74 |
-
yield history + [(message, assistant)] # live update
|
75 |
-
|
76 |
-
# except Exception as err: # pylint: disable=broad-except
|
77 |
-
# yield history + [(message, f"[ERROR] {err}")]
|
78 |
|
79 |
|
80 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
21 |
# a dummy value when none is provided. The *base_url* points to the local
|
22 |
# vLLM server that speaks the OpenAI REST dialect.
|
23 |
# -----------------------------------------------------------------------------
|
24 |
+
openai_api_key = "EMPTY"
|
25 |
+
openai_api_base = "http://0.0.0.0:8000/v1"
|
26 |
+
|
27 |
+
client = OpenAI(
|
28 |
+
api_key=openai_api_key,
|
29 |
+
base_url=openai_api_base,
|
30 |
+
)
|
31 |
|
32 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
33 |
# Chat handler
|
|
|
62 |
|
63 |
#try:
|
64 |
# Kick off streaming completion
|
65 |
+
response = client.chat.completions.create(
|
66 |
model="Qwen/Qwen3-4B",
|
67 |
messages=messages,
|
68 |
temperature=temperature,
|
69 |
top_p=top_p,
|
70 |
max_tokens=max_tokens,
|
|
|
71 |
)
|
72 |
|
73 |
+
assistant = response.choices[0].message
|
74 |
+
yield history + [(message, assistant)] # live update
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
|
77 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|