Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -104,6 +104,9 @@ app.add_middleware(RateLimitMiddleware, requests_per_second=2)
|
|
104 |
secret_api_endpoint = os.getenv('SECRET_API_ENDPOINT')
|
105 |
secret_api_endpoint_2 = os.getenv('SECRET_API_ENDPOINT_2')
|
106 |
secret_api_endpoint_3 = os.getenv('SECRET_API_ENDPOINT_3') # New endpoint for searchgpt
|
|
|
|
|
|
|
107 |
image_endpoint = os.getenv("IMAGE_ENDPOINT")
|
108 |
ENDPOINT_ORIGIN = os.getenv('ENDPOINT_ORIGIN')
|
109 |
|
@@ -244,60 +247,62 @@ async def return_models():
|
|
244 |
return await get_models()
|
245 |
server_status = True
|
246 |
@app.post("/chat/completions")
|
247 |
-
@app.post("api/v1/chat/completions")
|
248 |
-
async def get_completion(payload: Payload, request: Request,authenticated: bool = Depends(verify_api_key)):
|
249 |
# Check server status
|
250 |
-
|
251 |
-
|
252 |
model_to_use = payload.model if payload.model else "gpt-4o-mini"
|
253 |
-
|
254 |
# Validate model availability
|
255 |
if model_to_use not in available_model_ids:
|
256 |
raise HTTPException(
|
257 |
status_code=400,
|
258 |
detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
|
259 |
)
|
260 |
-
|
261 |
usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
|
262 |
-
|
263 |
# Prepare payload
|
264 |
payload_dict = payload.dict()
|
265 |
payload_dict["model"] = model_to_use
|
266 |
-
|
267 |
# Select the appropriate endpoint
|
268 |
-
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
# Current time and IP logging
|
271 |
current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
|
272 |
aaip = request.client.host
|
273 |
-
print(f"Time: {current_time}, {aaip}
|
274 |
-
print(payload_dict)
|
|
|
275 |
if not server_status:
|
276 |
return JSONResponse(
|
277 |
status_code=503,
|
278 |
content={"message": "Server is under maintenance. Please try again later."}
|
279 |
)
|
280 |
-
|
|
|
|
|
281 |
async def stream_generator(payload_dict):
|
282 |
-
|
283 |
-
# Prepare custom headers
|
284 |
-
custom_headers = {
|
285 |
-
'DNT': '1',
|
286 |
-
# 'Origin': ENDPOINT_ORIGIN,
|
287 |
-
'Priority': 'u=1, i',
|
288 |
-
# 'Referer': ENDPOINT_ORIGIN
|
289 |
-
}
|
290 |
-
|
291 |
try:
|
292 |
-
# Send POST request
|
293 |
response = scraper.post(
|
294 |
-
f"{endpoint}/v1/chat/completions",
|
295 |
-
json=payload_dict,
|
296 |
headers=custom_headers,
|
297 |
stream=True
|
298 |
)
|
299 |
-
|
300 |
-
#
|
301 |
if response.status_code == 422:
|
302 |
raise HTTPException(status_code=422, detail="Unprocessable entity. Check your payload.")
|
303 |
elif response.status_code == 400:
|
@@ -308,20 +313,19 @@ async def get_completion(payload: Payload, request: Request,authenticated: bool
|
|
308 |
raise HTTPException(status_code=404, detail="The requested resource was not found.")
|
309 |
elif response.status_code >= 500:
|
310 |
raise HTTPException(status_code=500, detail="Server error. Try again later.")
|
311 |
-
|
312 |
# Stream response lines to the client
|
313 |
for line in response.iter_lines():
|
314 |
if line:
|
315 |
yield line.decode('utf-8') + "\n"
|
|
|
316 |
except requests.exceptions.RequestException as req_err:
|
317 |
-
# Handle request-specific errors
|
318 |
print(response.text)
|
319 |
raise HTTPException(status_code=500, detail=f"Request failed: {req_err}")
|
320 |
except Exception as e:
|
321 |
-
# Handle unexpected errors
|
322 |
print(response.text)
|
323 |
raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
|
324 |
-
|
325 |
return StreamingResponse(stream_generator(payload_dict), media_type="application/json")
|
326 |
# Remove the duplicated endpoint and combine the functionality
|
327 |
@app.api_route("/images/generations", methods=["GET", "POST"]) # Support both GET and POST
|
|
|
104 |
secret_api_endpoint = os.getenv('SECRET_API_ENDPOINT')
|
105 |
secret_api_endpoint_2 = os.getenv('SECRET_API_ENDPOINT_2')
|
106 |
secret_api_endpoint_3 = os.getenv('SECRET_API_ENDPOINT_3') # New endpoint for searchgpt
|
107 |
+
mistral_api = "https://api.mistral.ai/v1"
|
108 |
+
mistral_key = os.getenv('MISTRAL_KEY')
|
109 |
+
mistral_models = ['mistral-saba-latest','mistral-small-latest','pixtral-large-latest']
|
110 |
image_endpoint = os.getenv("IMAGE_ENDPOINT")
|
111 |
ENDPOINT_ORIGIN = os.getenv('ENDPOINT_ORIGIN')
|
112 |
|
|
|
247 |
return await get_models()
|
248 |
server_status = True
|
249 |
@app.post("/chat/completions")
|
250 |
+
@app.post("/api/v1/chat/completions")
|
251 |
+
async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
|
252 |
# Check server status
|
|
|
|
|
253 |
model_to_use = payload.model if payload.model else "gpt-4o-mini"
|
254 |
+
|
255 |
# Validate model availability
|
256 |
if model_to_use not in available_model_ids:
|
257 |
raise HTTPException(
|
258 |
status_code=400,
|
259 |
detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
|
260 |
)
|
261 |
+
|
262 |
usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
|
263 |
+
|
264 |
# Prepare payload
|
265 |
payload_dict = payload.dict()
|
266 |
payload_dict["model"] = model_to_use
|
267 |
+
|
268 |
# Select the appropriate endpoint
|
269 |
+
if model_to_use in mistral_models:
|
270 |
+
endpoint = mistral_api
|
271 |
+
custom_headers = {
|
272 |
+
"Authorization": f"Bearer {mistral_key}"
|
273 |
+
}
|
274 |
+
elif model_to_use in alternate_models:
|
275 |
+
endpoint = secret_api_endpoint_2
|
276 |
+
custom_headers = {}
|
277 |
+
else:
|
278 |
+
endpoint = secret_api_endpoint
|
279 |
+
custom_headers = {}
|
280 |
+
|
281 |
# Current time and IP logging
|
282 |
current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
|
283 |
aaip = request.client.host
|
284 |
+
print(f"Time: {current_time}, {aaip}, {model_to_use}, server status: {server_status}")
|
285 |
+
print(payload_dict)
|
286 |
+
|
287 |
if not server_status:
|
288 |
return JSONResponse(
|
289 |
status_code=503,
|
290 |
content={"message": "Server is under maintenance. Please try again later."}
|
291 |
)
|
292 |
+
|
293 |
+
scraper = cloudscraper.create_scraper()
|
294 |
+
|
295 |
async def stream_generator(payload_dict):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
try:
|
297 |
+
# Send POST request with the correct headers
|
298 |
response = scraper.post(
|
299 |
+
f"{endpoint}/v1/chat/completions",
|
300 |
+
json=payload_dict,
|
301 |
headers=custom_headers,
|
302 |
stream=True
|
303 |
)
|
304 |
+
|
305 |
+
# Handle response errors
|
306 |
if response.status_code == 422:
|
307 |
raise HTTPException(status_code=422, detail="Unprocessable entity. Check your payload.")
|
308 |
elif response.status_code == 400:
|
|
|
313 |
raise HTTPException(status_code=404, detail="The requested resource was not found.")
|
314 |
elif response.status_code >= 500:
|
315 |
raise HTTPException(status_code=500, detail="Server error. Try again later.")
|
316 |
+
|
317 |
# Stream response lines to the client
|
318 |
for line in response.iter_lines():
|
319 |
if line:
|
320 |
yield line.decode('utf-8') + "\n"
|
321 |
+
|
322 |
except requests.exceptions.RequestException as req_err:
|
|
|
323 |
print(response.text)
|
324 |
raise HTTPException(status_code=500, detail=f"Request failed: {req_err}")
|
325 |
except Exception as e:
|
|
|
326 |
print(response.text)
|
327 |
raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
|
328 |
+
|
329 |
return StreamingResponse(stream_generator(payload_dict), media_type="application/json")
|
330 |
# Remove the duplicated endpoint and combine the functionality
|
331 |
@app.api_route("/images/generations", methods=["GET", "POST"]) # Support both GET and POST
|