File size: 1,844 Bytes
4c07c1e
a846510
 
 
4c07c1e
a846510
4c07c1e
a846510
4c07c1e
a846510
 
 
4c07c1e
952fd8e
 
4c07c1e
a846510
fb33d5d
a846510
 
 
 
 
4c07c1e
a846510
 
 
 
 
 
4c07c1e
a846510
 
 
4c07c1e
a846510
 
 
 
4c07c1e
a967284
a846510
 
4c07c1e
a846510
4c07c1e
a846510
 
a967284
 
a846510
4c07c1e
a846510
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import requests
import os
import spaces

from server import setup_mixinputs, launch_vllm_server

API_URL = "http://localhost:8000/v1/chat/completions"

def chat_with_moi(message, history, temperature, top_p, beta):
    # Set the MIXINPUTS_BETA env var *per request*
    os.environ["MIXINPUTS_BETA"] = str(beta)

    # setup_mixinputs()
    # launch_vllm_server(beta=beta)

    payload = {
        "model": "Qwen/Qwen3-4B",  # match what your vLLM server expects
        "messages": [{"role": "user", "content": message}],
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": 512,
    }

    try:
        response = requests.post(API_URL, json=payload)
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        return f"[ERROR] {str(e)}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🧪 Mixture of Inputs (MoI) Demo with vLLM")

    with gr.Row():
        temperature = gr.Slider(0.0, 1.5, value=0.7, label="Temperature")
        top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
        beta = gr.Slider(0.0, 10.0, value=1.0, label="MoI Beta")

    chatbot = gr.Chatbot(type="messages")
    message = gr.Textbox(label="Your message")
    send_btn = gr.Button("Send")

    history = gr.State([])

    def respond(user_message, chat_history, temperature, top_p, beta):
        reply = chat_with_moi(user_message, chat_history, temperature, top_p, beta)
        chat_history = chat_history + [{"role": "user", "content": user_message},
                                    {"role": "assistant", "content": reply}]
        return chat_history, chat_history

    send_btn.click(respond, inputs=[message, history, temperature, top_p, beta],
                   outputs=[chatbot, history])

demo.launch()