Spaces:
Sleeping
Sleeping
File size: 1,844 Bytes
4c07c1e a846510 4c07c1e a846510 4c07c1e a846510 4c07c1e a846510 4c07c1e 952fd8e 4c07c1e a846510 fb33d5d a846510 4c07c1e a846510 4c07c1e a846510 4c07c1e a846510 4c07c1e a967284 a846510 4c07c1e a846510 4c07c1e a846510 a967284 a846510 4c07c1e a846510 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import gradio as gr
import requests
import os
import spaces
from server import setup_mixinputs, launch_vllm_server
API_URL = "http://localhost:8000/v1/chat/completions"
def chat_with_moi(message, history, temperature, top_p, beta):
# Set the MIXINPUTS_BETA env var *per request*
os.environ["MIXINPUTS_BETA"] = str(beta)
# setup_mixinputs()
# launch_vllm_server(beta=beta)
payload = {
"model": "Qwen/Qwen3-4B", # match what your vLLM server expects
"messages": [{"role": "user", "content": message}],
"temperature": temperature,
"top_p": top_p,
"max_tokens": 512,
}
try:
response = requests.post(API_URL, json=payload)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
except Exception as e:
return f"[ERROR] {str(e)}"
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 🧪 Mixture of Inputs (MoI) Demo with vLLM")
with gr.Row():
temperature = gr.Slider(0.0, 1.5, value=0.7, label="Temperature")
top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
beta = gr.Slider(0.0, 10.0, value=1.0, label="MoI Beta")
chatbot = gr.Chatbot(type="messages")
message = gr.Textbox(label="Your message")
send_btn = gr.Button("Send")
history = gr.State([])
def respond(user_message, chat_history, temperature, top_p, beta):
reply = chat_with_moi(user_message, chat_history, temperature, top_p, beta)
chat_history = chat_history + [{"role": "user", "content": user_message},
{"role": "assistant", "content": reply}]
return chat_history, chat_history
send_btn.click(respond, inputs=[message, history, temperature, top_p, beta],
outputs=[chatbot, history])
demo.launch()
|