Spaces:

yzhuang
/

MixtureOfInputs

Sleeping

App Files Files Community

yzhuang commited on May 22

Commit

39209e4

verified ·

1 Parent(s): 3d038b0

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -41

app.py CHANGED Viewed

@@ -1,25 +1,16 @@
 # app.py
-import json
-import requests, threading
-import sseclient
-import gradio as gr
-import server
 API_URL = "http://0.0.0.0:8000/v1/chat/completions"
 def stream_completion(message, history, max_tokens, temperature, top_p, beta):
-    """
-    Gradio callback: takes the newest user message + full chat history,
-    returns an updated history while streaming assistant tokens.
-    """
-    # ------- build OpenAI-style message list (no system prompt) -------------
-    messages = []
-    for usr, bot in history:
-        if usr:
-            messages.append({"role": "user", "content": usr})
-        if bot:
-            messages.append({"role": "assistant", "content": bot})
     messages.append({"role": "user", "content": message})
     payload = {
@@ -36,44 +27,52 @@ def stream_completion(message, history, max_tokens, temperature, top_p, beta):
     }
     try:
-        resp = requests.post(API_URL, json=payload, stream=True, headers=headers, timeout=60)
-        resp.raise_for_status()
-        client = sseclient.SSEClient(resp)
-        assistant = ""
-        for event in client.events():
-            if event.data.strip() == "[DONE]":
-                break
-            delta = json.loads(event.data)["choices"][0]["delta"].get("content", "")
-            assistant += delta
-            yield history + [(message, assistant)]  # update the chat box live
     except Exception as err:
         yield history + [(message, f"[ERROR] {err}")]
-# ----------------------- UI ---------------------------------------------
 with gr.Blocks(title="🎨 Mixture of Inputs (MoI) Demo") as demo:
     gr.Markdown(
         "## 🎨 Mixture of Inputs (MoI) Demo  \n"
-        "Streaming vLLM demo with dynamic **beta** adjustment in MoI, higher beta means less blending."
     )
-    # sliders first – all on one row
-    with gr.Row():
-        beta = gr.Slider(0.0, 10.0, value=1.0, step=0.1, label="MoI Beta")
-        temperature = gr.Slider(0.1, 1.0, value=0.6, step=0.1, label="Temperature")
-        top_p = gr.Slider(0.1, 1.0, value=0.80, step=0.05, label="Top-p")
-        max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max new tokens")
-    chatbot = gr.Chatbot(height=450)
-    user_box = gr.Textbox(placeholder="Type a message and press Enter…", show_label=False)
     clear_btn = gr.Button("Clear chat")
-    # wiring
     user_box.submit(
-        stream_completion,
         inputs=[user_box, chatbot, max_tokens, temperature, top_p, beta],
         outputs=chatbot,
     )

 # app.py
+import json, requests, gradio as gr
 API_URL = "http://0.0.0.0:8000/v1/chat/completions"
 def stream_completion(message, history, max_tokens, temperature, top_p, beta):
+    """Gradio callback: stream the assistant’s reply token-by-token."""
+    # -------- build OpenAI-style message list (no system prompt) -------------
+    messages = [{"role": "user", "content": u}         # past user turns
+                if i % 2 == 0 else                    # even idx → user
+                {"role": "assistant", "content": u}    # odd  idx → assistant
+                for i, (u, _) in enumerate(sum(([h[0], h[1]] for h in history), []))
+                if u]                                  # drop empty strings
     messages.append({"role": "user", "content": message})
     payload = {
     }
     try:
+        with requests.post(API_URL,
+                           json=payload,
+                           stream=True,
+                           headers=headers,
+                           timeout=(10, None)) as resp:
+            resp.raise_for_status()
+            assistant = ""
+            # iterate over the HTTP chunks
+            for raw in resp.iter_lines(decode_unicode=True, delimiter=b"\n"):
+                if not raw:
+                    continue
+                if raw.startswith("data: "):
+                    data = raw[6:]                 # strip the 'data: ' prefix
+                else:
+                    data = raw
+                if data.strip() == "[DONE]":
+                    break
+                delta = json.loads(data)["choices"][0]["delta"].get("content", "")
+                assistant += delta
+                yield history + [(message, assistant)]  # live update in Gradio
     except Exception as err:
         yield history + [(message, f"[ERROR] {err}")]
+# ---------------------------- UI --------------------------------------------
 with gr.Blocks(title="🎨 Mixture of Inputs (MoI) Demo") as demo:
     gr.Markdown(
         "## 🎨 Mixture of Inputs (MoI) Demo  \n"
+        "Streaming vLLM demo with dynamic **beta** adjustment in MoI "
+        "(higher beta → less blending)."
     )
+    with gr.Row():  # sliders first
+        beta         = gr.Slider(0.0, 10.0, value=1.0,  step=0.1,  label="MoI β")
+        temperature  = gr.Slider(0.1, 1.0,  value=0.6,  step=0.1,  label="Temperature")
+        top_p        = gr.Slider(0.1, 1.0,  value=0.80, step=0.05, label="Top-p")
+        max_tokens   = gr.Slider(1,   2048, value=512,  step=1,    label="Max new tokens")
+    chatbot   = gr.Chatbot(height=450)
+    user_box  = gr.Textbox(placeholder="Type a message and press Enter…", show_label=False)
     clear_btn = gr.Button("Clear chat")
     user_box.submit(
+        fn=stream_completion,
         inputs=[user_box, chatbot, max_tokens, temperature, top_p, beta],
         outputs=chatbot,
     )