yzhuang commited on
Commit
fb33d5d
·
1 Parent(s): a634807
Files changed (2) hide show
  1. app.py +1 -1
  2. server.py +2 -2
app.py CHANGED
@@ -15,7 +15,7 @@ def chat_with_moi(message, history, temperature, top_p, beta):
15
  # launch_vllm_server(beta=beta)
16
 
17
  payload = {
18
- "model": "Qwen/QwQ-32B", # match what your vLLM server expects
19
  "messages": [{"role": "user", "content": message}],
20
  "temperature": temperature,
21
  "top_p": top_p,
 
15
  # launch_vllm_server(beta=beta)
16
 
17
  payload = {
18
+ "model": "Qwen/Qwen3-4B", # match what your vLLM server expects
19
  "messages": [{"role": "user", "content": message}],
20
  "temperature": temperature,
21
  "top_p": top_p,
server.py CHANGED
@@ -8,7 +8,7 @@ def setup_mixinputs():
8
  # Step 1: Run mixinputs setup
9
  subprocess.run(["mixinputs", "setup"], check=True)
10
 
11
- @spaces.GPU(duration=240)
12
  def launch_vllm_server(beta=1.0):
13
  # Step 2: Set environment variables
14
  env = os.environ.copy()
@@ -18,7 +18,7 @@ def launch_vllm_server(beta=1.0):
18
  # Step 3: Launch vLLM with custom options
19
  cmd = [
20
  "vllm", "serve",
21
- "Qwen/QwQ-32B",
22
  "--tensor-parallel-size", "1",
23
  "--enforce-eager",
24
  "--max-seq-len-to-capture", "2048",
 
8
  # Step 1: Run mixinputs setup
9
  subprocess.run(["mixinputs", "setup"], check=True)
10
 
11
+ # @spaces.GPU(duration=240)
12
  def launch_vllm_server(beta=1.0):
13
  # Step 2: Set environment variables
14
  env = os.environ.copy()
 
18
  # Step 3: Launch vLLM with custom options
19
  cmd = [
20
  "vllm", "serve",
21
+ "Qwen/Qwen3-4B",
22
  "--tensor-parallel-size", "1",
23
  "--enforce-eager",
24
  "--max-seq-len-to-capture", "2048",