Spaces:
Sleeping
Sleeping
sync
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ def chat_with_moi(message, history, temperature, top_p, beta):
|
|
15 |
# launch_vllm_server(beta=beta)
|
16 |
|
17 |
payload = {
|
18 |
-
"model": "Qwen/
|
19 |
"messages": [{"role": "user", "content": message}],
|
20 |
"temperature": temperature,
|
21 |
"top_p": top_p,
|
|
|
15 |
# launch_vllm_server(beta=beta)
|
16 |
|
17 |
payload = {
|
18 |
+
"model": "Qwen/Qwen3-4B", # match what your vLLM server expects
|
19 |
"messages": [{"role": "user", "content": message}],
|
20 |
"temperature": temperature,
|
21 |
"top_p": top_p,
|
server.py
CHANGED
@@ -8,7 +8,7 @@ def setup_mixinputs():
|
|
8 |
# Step 1: Run mixinputs setup
|
9 |
subprocess.run(["mixinputs", "setup"], check=True)
|
10 |
|
11 |
-
@spaces.GPU(duration=240)
|
12 |
def launch_vllm_server(beta=1.0):
|
13 |
# Step 2: Set environment variables
|
14 |
env = os.environ.copy()
|
@@ -18,7 +18,7 @@ def launch_vllm_server(beta=1.0):
|
|
18 |
# Step 3: Launch vLLM with custom options
|
19 |
cmd = [
|
20 |
"vllm", "serve",
|
21 |
-
"Qwen/
|
22 |
"--tensor-parallel-size", "1",
|
23 |
"--enforce-eager",
|
24 |
"--max-seq-len-to-capture", "2048",
|
|
|
8 |
# Step 1: Run mixinputs setup
|
9 |
subprocess.run(["mixinputs", "setup"], check=True)
|
10 |
|
11 |
+
# @spaces.GPU(duration=240)
|
12 |
def launch_vllm_server(beta=1.0):
|
13 |
# Step 2: Set environment variables
|
14 |
env = os.environ.copy()
|
|
|
18 |
# Step 3: Launch vLLM with custom options
|
19 |
cmd = [
|
20 |
"vllm", "serve",
|
21 |
+
"Qwen/Qwen3-4B",
|
22 |
"--tensor-parallel-size", "1",
|
23 |
"--enforce-eager",
|
24 |
"--max-seq-len-to-capture", "2048",
|