yzhuang commited on
Commit
25fd9cd
Β·
verified Β·
1 Parent(s): 524243a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -21,8 +21,13 @@ import gradio as gr
21
  # a dummy value when none is provided. The *base_url* points to the local
22
  # vLLM server that speaks the OpenAI REST dialect.
23
  # -----------------------------------------------------------------------------
24
- openai.api_key = os.getenv("OPENAI_API_KEY", "EMPTY")
25
- openai.base_url = "http://0.0.0.0:8000/v1"
 
 
 
 
 
26
 
27
  # ──────────────────────────────────────────────────────────────────────────────
28
  # Chat handler
@@ -57,24 +62,16 @@ def stream_completion(message: str,
57
 
58
  #try:
59
  # Kick off streaming completion
60
- response = openai.chat.completions.create(
61
  model="Qwen/Qwen3-4B",
62
  messages=messages,
63
  temperature=temperature,
64
  top_p=top_p,
65
  max_tokens=max_tokens,
66
- stream=True,
67
  )
68
 
69
- assistant = ""
70
- for chunk in response:
71
- # ``delta.content`` is None for e.g. role announcements; guard with or ""
72
- delta = chunk.choices[0].delta.content or ""
73
- assistant += delta
74
- yield history + [(message, assistant)] # live update
75
-
76
- # except Exception as err: # pylint: disable=broad-except
77
- # yield history + [(message, f"[ERROR] {err}")]
78
 
79
 
80
  # ──────────────────────────────────────────────────────────────────────────────
 
21
  # a dummy value when none is provided. The *base_url* points to the local
22
  # vLLM server that speaks the OpenAI REST dialect.
23
  # -----------------------------------------------------------------------------
24
+ openai_api_key = "EMPTY"
25
+ openai_api_base = "http://0.0.0.0:8000/v1"
26
+
27
+ client = OpenAI(
28
+ api_key=openai_api_key,
29
+ base_url=openai_api_base,
30
+ )
31
 
32
  # ──────────────────────────────────────────────────────────────────────────────
33
  # Chat handler
 
62
 
63
  #try:
64
  # Kick off streaming completion
65
+ response = client.chat.completions.create(
66
  model="Qwen/Qwen3-4B",
67
  messages=messages,
68
  temperature=temperature,
69
  top_p=top_p,
70
  max_tokens=max_tokens,
 
71
  )
72
 
73
+ assistant = response.choices[0].message
74
+ yield history + [(message, assistant)] # live update
 
 
 
 
 
 
 
75
 
76
 
77
  # ──────────────────────────────────────────────────────────────────────────────