yzhuang commited on
Commit
39209e4
·
verified ·
1 Parent(s): 3d038b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -41
app.py CHANGED
@@ -1,25 +1,16 @@
1
  # app.py
2
- import json
3
- import requests, threading
4
- import sseclient
5
- import gradio as gr
6
- import server
7
 
8
  API_URL = "http://0.0.0.0:8000/v1/chat/completions"
9
 
10
-
11
  def stream_completion(message, history, max_tokens, temperature, top_p, beta):
12
- """
13
- Gradio callback: takes the newest user message + full chat history,
14
- returns an updated history while streaming assistant tokens.
15
- """
16
- # ------- build OpenAI-style message list (no system prompt) -------------
17
- messages = []
18
- for usr, bot in history:
19
- if usr:
20
- messages.append({"role": "user", "content": usr})
21
- if bot:
22
- messages.append({"role": "assistant", "content": bot})
23
  messages.append({"role": "user", "content": message})
24
 
25
  payload = {
@@ -36,44 +27,52 @@ def stream_completion(message, history, max_tokens, temperature, top_p, beta):
36
  }
37
 
38
  try:
39
- resp = requests.post(API_URL, json=payload, stream=True, headers=headers, timeout=60)
40
- resp.raise_for_status()
41
- client = sseclient.SSEClient(resp)
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- assistant = ""
44
- for event in client.events():
45
- if event.data.strip() == "[DONE]":
46
- break
47
- delta = json.loads(event.data)["choices"][0]["delta"].get("content", "")
48
- assistant += delta
49
- yield history + [(message, assistant)] # update the chat box live
50
 
 
 
 
51
  except Exception as err:
52
  yield history + [(message, f"[ERROR] {err}")]
53
 
54
-
55
- # ----------------------- UI ---------------------------------------------
56
  with gr.Blocks(title="🎨 Mixture of Inputs (MoI) Demo") as demo:
57
  gr.Markdown(
58
  "## 🎨 Mixture of Inputs (MoI) Demo \n"
59
- "Streaming vLLM demo with dynamic **beta** adjustment in MoI, higher beta means less blending."
 
60
  )
61
 
62
- # sliders first – all on one row
63
- with gr.Row():
64
- beta = gr.Slider(0.0, 10.0, value=1.0, step=0.1, label="MoI Beta")
65
- temperature = gr.Slider(0.1, 1.0, value=0.6, step=0.1, label="Temperature")
66
- top_p = gr.Slider(0.1, 1.0, value=0.80, step=0.05, label="Top-p")
67
- max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max new tokens")
68
-
69
 
70
- chatbot = gr.Chatbot(height=450)
71
- user_box = gr.Textbox(placeholder="Type a message and press Enter…", show_label=False)
72
  clear_btn = gr.Button("Clear chat")
73
 
74
- # wiring
75
  user_box.submit(
76
- stream_completion,
77
  inputs=[user_box, chatbot, max_tokens, temperature, top_p, beta],
78
  outputs=chatbot,
79
  )
 
1
  # app.py
2
+ import json, requests, gradio as gr
 
 
 
 
3
 
4
  API_URL = "http://0.0.0.0:8000/v1/chat/completions"
5
 
 
6
  def stream_completion(message, history, max_tokens, temperature, top_p, beta):
7
+ """Gradio callback: stream the assistant’s reply token-by-token."""
8
+ # -------- build OpenAI-style message list (no system prompt) -------------
9
+ messages = [{"role": "user", "content": u} # past user turns
10
+ if i % 2 == 0 else # even idx → user
11
+ {"role": "assistant", "content": u} # odd idx assistant
12
+ for i, (u, _) in enumerate(sum(([h[0], h[1]] for h in history), []))
13
+ if u] # drop empty strings
 
 
 
 
14
  messages.append({"role": "user", "content": message})
15
 
16
  payload = {
 
27
  }
28
 
29
  try:
30
+ with requests.post(API_URL,
31
+ json=payload,
32
+ stream=True,
33
+ headers=headers,
34
+ timeout=(10, None)) as resp:
35
+ resp.raise_for_status()
36
+
37
+ assistant = ""
38
+ # iterate over the HTTP chunks
39
+ for raw in resp.iter_lines(decode_unicode=True, delimiter=b"\n"):
40
+ if not raw:
41
+ continue
42
+ if raw.startswith("data: "):
43
+ data = raw[6:] # strip the 'data: ' prefix
44
+ else:
45
+ data = raw
46
 
47
+ if data.strip() == "[DONE]":
48
+ break
 
 
 
 
 
49
 
50
+ delta = json.loads(data)["choices"][0]["delta"].get("content", "")
51
+ assistant += delta
52
+ yield history + [(message, assistant)] # live update in Gradio
53
  except Exception as err:
54
  yield history + [(message, f"[ERROR] {err}")]
55
 
56
+ # ---------------------------- UI --------------------------------------------
 
57
  with gr.Blocks(title="🎨 Mixture of Inputs (MoI) Demo") as demo:
58
  gr.Markdown(
59
  "## 🎨 Mixture of Inputs (MoI) Demo \n"
60
+ "Streaming vLLM demo with dynamic **beta** adjustment in MoI "
61
+ "(higher beta → less blending)."
62
  )
63
 
64
+ with gr.Row(): # sliders first
65
+ beta = gr.Slider(0.0, 10.0, value=1.0, step=0.1, label="MoI β")
66
+ temperature = gr.Slider(0.1, 1.0, value=0.6, step=0.1, label="Temperature")
67
+ top_p = gr.Slider(0.1, 1.0, value=0.80, step=0.05, label="Top-p")
68
+ max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max new tokens")
 
 
69
 
70
+ chatbot = gr.Chatbot(height=450)
71
+ user_box = gr.Textbox(placeholder="Type a message and press Enter…", show_label=False)
72
  clear_btn = gr.Button("Clear chat")
73
 
 
74
  user_box.submit(
75
+ fn=stream_completion,
76
  inputs=[user_box, chatbot, max_tokens, temperature, top_p, beta],
77
  outputs=chatbot,
78
  )