Spaces:
Sleeping
Sleeping
server
Browse files
app.py
CHANGED
@@ -3,8 +3,9 @@ import json
|
|
3 |
import requests
|
4 |
import sseclient
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
-
API_URL = "http://localhost:8000/v1/completions"
|
8 |
|
9 |
|
10 |
def stream_completion(message, history, max_tokens, temperature, top_p, beta):
|
|
|
3 |
import requests
|
4 |
import sseclient
|
5 |
import gradio as gr
|
6 |
+
import server
|
7 |
|
8 |
+
API_URL = "http://localhost:8000/v1/chat/completions"
|
9 |
|
10 |
|
11 |
def stream_completion(message, history, max_tokens, temperature, top_p, beta):
|
server.py
CHANGED
@@ -33,4 +33,4 @@ setup_mixinputs()
|
|
33 |
threading.Thread(target=launch_vllm_server, daemon=True).start()
|
34 |
|
35 |
# Step 3: Give time for server to initialize
|
36 |
-
time.sleep(
|
|
|
33 |
threading.Thread(target=launch_vllm_server, daemon=True).start()
|
34 |
|
35 |
# Step 3: Give time for server to initialize
|
36 |
+
time.sleep(60)
|