yzhuang commited on
Commit
f873ce7
·
1 Parent(s): a967284

update gradio

Browse files
Files changed (1) hide show
  1. app.py +56 -38
app.py CHANGED
@@ -1,56 +1,74 @@
1
  import gradio as gr
2
  import requests
 
3
  import os
4
- import spaces
5
-
6
- from server import setup_mixinputs, launch_vllm_server
7
 
8
  API_URL = "http://localhost:8000/v1/chat/completions"
9
 
10
- def chat_with_moi(message, history, temperature, top_p, beta):
11
- # Set the MIXINPUTS_BETA env var *per request*
12
- os.environ["MIXINPUTS_BETA"] = str(beta)
13
-
14
- # setup_mixinputs()
15
- # launch_vllm_server(beta=beta)
 
 
 
 
 
 
 
 
 
 
 
16
 
 
17
  payload = {
18
- "model": "Qwen/Qwen3-4B", # match what your vLLM server expects
19
- "messages": [{"role": "user", "content": message}],
20
  "temperature": temperature,
21
  "top_p": top_p,
22
- "max_tokens": 512,
 
23
  }
24
 
 
 
 
 
 
 
 
25
  try:
26
- response = requests.post(API_URL, json=payload)
27
  response.raise_for_status()
28
- return response.json()["choices"][0]["message"]["content"]
29
- except Exception as e:
30
- return f"[ERROR] {str(e)}"
31
 
32
- # Gradio UI
33
- with gr.Blocks() as demo:
34
- gr.Markdown("# 🧪 Mixture of Inputs (MoI) Demo with vLLM")
 
 
 
 
35
 
36
- with gr.Row():
37
- temperature = gr.Slider(0.0, 1.5, value=0.7, label="Temperature")
38
- top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
39
- beta = gr.Slider(0.0, 10.0, value=1.0, label="MoI Beta")
40
-
41
- chatbot = gr.Chatbot(type="messages")
42
- message = gr.Textbox(label="Your message")
43
- send_btn = gr.Button("Send")
44
-
45
- history = gr.State([])
46
-
47
- def respond(user_message, chat_history, temperature, top_p, beta):
48
- reply = chat_with_moi(user_message, chat_history, temperature, top_p, beta)
49
- chat_history = chat_history + [{"role": "user", "content": user_message},
50
- {"role": "assistant", "content": reply}]
51
- return chat_history, chat_history
52
 
53
- send_btn.click(respond, inputs=[message, history, temperature, top_p, beta],
54
- outputs=[chatbot, history])
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- demo.launch()
 
 
1
  import gradio as gr
2
  import requests
3
+ import sseclient
4
  import os
 
 
 
5
 
6
  API_URL = "http://localhost:8000/v1/chat/completions"
7
 
8
+ def respond(
9
+ message,
10
+ history: list[tuple[str, str]],
11
+ system_message,
12
+ max_tokens,
13
+ temperature,
14
+ top_p,
15
+ beta,
16
+ ):
17
+ # Build message history
18
+ messages = [{"role": "system", "content": system_message}]
19
+ for user, assistant in history:
20
+ if user:
21
+ messages.append({"role": "user", "content": user})
22
+ if assistant:
23
+ messages.append({"role": "assistant", "content": assistant})
24
+ messages.append({"role": "user", "content": message})
25
 
26
+ # Prepare request payload
27
  payload = {
28
+ "model": "Qwen/Qwen3-4B", # Update to your actual model if needed
29
+ "messages": messages,
30
  "temperature": temperature,
31
  "top_p": top_p,
32
+ "max_tokens": max_tokens,
33
+ "stream": True,
34
  }
35
 
36
+ # Optional: send beta as a custom OpenAI field
37
+ headers = {
38
+ "Content-Type": "application/json",
39
+ "X-MIXINPUTS-BETA": str(beta), # or modify your vLLM code to read this
40
+ }
41
+
42
+ # Stream response using SSE (Server-Sent Events)
43
  try:
44
+ response = requests.post(API_URL, json=payload, stream=True, headers=headers)
45
  response.raise_for_status()
46
+ client = sseclient.SSEClient(response)
 
 
47
 
48
+ full_text = ""
49
+ for event in client.events():
50
+ if event.data == "[DONE]":
51
+ break
52
+ delta = event.json()["choices"][0]["delta"].get("content", "")
53
+ full_text += delta
54
+ yield full_text
55
 
56
+ except Exception as e:
57
+ yield f"[ERROR] {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ # UI layout using ChatInterface
60
+ demo = gr.ChatInterface(
61
+ respond,
62
+ additional_inputs=[
63
+ gr.Textbox(value="You are a helpful assistant using Mixture of Inputs.", label="System message"),
64
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
65
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
66
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
67
+ gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.1, label="MoI Beta"),
68
+ ],
69
+ title="🧪 Mixture of Inputs (MoI) Demo",
70
+ description="Streaming local vLLM demo with dynamic MoI beta adjustment.",
71
+ )
72
 
73
+ if __name__ == "__main__":
74
+ demo.launch()