Spaces:

yzhuang
/

MixtureOfInputs

Sleeping

yzhuang commited on May 22

Commit

fb33d5d

1 Parent(s): a634807

sync

Files changed (2) hide show

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ def chat_with_moi(message, history, temperature, top_p, beta):
     # launch_vllm_server(beta=beta)
     payload = {
-        "model": "Qwen/QwQ-32B",  # match what your vLLM server expects
         "messages": [{"role": "user", "content": message}],
         "temperature": temperature,
         "top_p": top_p,

     # launch_vllm_server(beta=beta)
     payload = {
+        "model": "Qwen/Qwen3-4B",  # match what your vLLM server expects
         "messages": [{"role": "user", "content": message}],
         "temperature": temperature,
         "top_p": top_p,

server.py CHANGED Viewed

@@ -8,7 +8,7 @@ def setup_mixinputs():
     # Step 1: Run mixinputs setup
     subprocess.run(["mixinputs", "setup"], check=True)
-@spaces.GPU(duration=240)
 def launch_vllm_server(beta=1.0):
     # Step 2: Set environment variables
     env = os.environ.copy()
@@ -18,7 +18,7 @@ def launch_vllm_server(beta=1.0):
     # Step 3: Launch vLLM with custom options
     cmd = [
         "vllm", "serve",
-        "Qwen/QwQ-32B",
         "--tensor-parallel-size", "1",
         "--enforce-eager",
         "--max-seq-len-to-capture", "2048",

     # Step 1: Run mixinputs setup
     subprocess.run(["mixinputs", "setup"], check=True)
+# @spaces.GPU(duration=240)
 def launch_vllm_server(beta=1.0):
     # Step 2: Set environment variables
     env = os.environ.copy()
     # Step 3: Launch vLLM with custom options
     cmd = [
         "vllm", "serve",
+        "Qwen/Qwen3-4B",
         "--tensor-parallel-size", "1",
         "--enforce-eager",
         "--max-seq-len-to-capture", "2048",