Spaces:

ParthSadaria
/

lokiai

Running

App Files Files Community

ParthSadaria commited on Feb 28

Commit

de98206

verified ·

1 Parent(s): 7a110a5

Update main.py

Browse files

Files changed (1) hide show

main.py +35 -31

main.py CHANGED Viewed

@@ -104,6 +104,9 @@ app.add_middleware(RateLimitMiddleware, requests_per_second=2)
 secret_api_endpoint = os.getenv('SECRET_API_ENDPOINT')
 secret_api_endpoint_2 = os.getenv('SECRET_API_ENDPOINT_2')
 secret_api_endpoint_3 = os.getenv('SECRET_API_ENDPOINT_3')  # New endpoint for searchgpt
 image_endpoint = os.getenv("IMAGE_ENDPOINT")
 ENDPOINT_ORIGIN = os.getenv('ENDPOINT_ORIGIN')
@@ -244,60 +247,62 @@ async def return_models():
     return await get_models()
 server_status = True
 @app.post("/chat/completions")
-@app.post("api/v1/chat/completions")
-async def get_completion(payload: Payload, request: Request,authenticated: bool = Depends(verify_api_key)):
     # Check server status
     model_to_use = payload.model if payload.model else "gpt-4o-mini"
     # Validate model availability
     if model_to_use not in available_model_ids:
         raise HTTPException(
             status_code=400,
             detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
         )
     usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
     # Prepare payload
     payload_dict = payload.dict()
     payload_dict["model"] = model_to_use
-    # payload_dict["stream"] = payload_dict.get("stream", False)
     # Select the appropriate endpoint
-    endpoint = secret_api_endpoint_2 if model_to_use in alternate_models else secret_api_endpoint
     # Current time and IP logging
     current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
     aaip = request.client.host
-    print(f"Time: {current_time}, {aaip} , {model_to_use}, server status :- {server_status}")
-    print(payload_dict)
     if not server_status:
         return JSONResponse(
             status_code=503,
             content={"message": "Server is under maintenance. Please try again later."}
         )
-    scraper = cloudscraper.create_scraper()
     async def stream_generator(payload_dict):
-        # Prepare custom headers
-        custom_headers = {
-            'DNT': '1',
-            # 'Origin': ENDPOINT_ORIGIN,
-            'Priority': 'u=1, i',
-            # 'Referer': ENDPOINT_ORIGIN
-        }
         try:
-            # Send POST request using CloudScraper with custom headers
             response = scraper.post(
-                f"{endpoint}/v1/chat/completions",
-                json=payload_dict,
                 headers=custom_headers,
                 stream=True
             )
-            # Error handling remains the same as in previous version
             if response.status_code == 422:
                 raise HTTPException(status_code=422, detail="Unprocessable entity. Check your payload.")
             elif response.status_code == 400:
@@ -308,20 +313,19 @@ async def get_completion(payload: Payload, request: Request,authenticated: bool
                 raise HTTPException(status_code=404, detail="The requested resource was not found.")
             elif response.status_code >= 500:
                 raise HTTPException(status_code=500, detail="Server error. Try again later.")
             # Stream response lines to the client
             for line in response.iter_lines():
                 if line:
                     yield line.decode('utf-8') + "\n"
         except requests.exceptions.RequestException as req_err:
-            # Handle request-specific errors
             print(response.text)
             raise HTTPException(status_code=500, detail=f"Request failed: {req_err}")
         except Exception as e:
-            # Handle unexpected errors
             print(response.text)
             raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
     return StreamingResponse(stream_generator(payload_dict), media_type="application/json")
 # Remove the duplicated endpoint and combine the functionality
 @app.api_route("/images/generations", methods=["GET", "POST"])  # Support both GET and POST

 secret_api_endpoint = os.getenv('SECRET_API_ENDPOINT')
 secret_api_endpoint_2 = os.getenv('SECRET_API_ENDPOINT_2')
 secret_api_endpoint_3 = os.getenv('SECRET_API_ENDPOINT_3')  # New endpoint for searchgpt
+mistral_api = "https://api.mistral.ai/v1"
+mistral_key = os.getenv('MISTRAL_KEY')
+mistral_models = ['mistral-saba-latest','mistral-small-latest','pixtral-large-latest']
 image_endpoint = os.getenv("IMAGE_ENDPOINT")
 ENDPOINT_ORIGIN = os.getenv('ENDPOINT_ORIGIN')
     return await get_models()
 server_status = True
 @app.post("/chat/completions")
+@app.post("/api/v1/chat/completions")
+async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
     # Check server status
     model_to_use = payload.model if payload.model else "gpt-4o-mini"
     # Validate model availability
     if model_to_use not in available_model_ids:
         raise HTTPException(
             status_code=400,
             detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
         )
     usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
     # Prepare payload
     payload_dict = payload.dict()
     payload_dict["model"] = model_to_use
     # Select the appropriate endpoint
+    if model_to_use in mistral_models:
+        endpoint = mistral_api
+        custom_headers = {
+            "Authorization": f"Bearer {mistral_key}"
+        }
+    elif model_to_use in alternate_models:
+        endpoint = secret_api_endpoint_2
+        custom_headers = {}
+    else:
+        endpoint = secret_api_endpoint
+        custom_headers = {}
     # Current time and IP logging
     current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
     aaip = request.client.host
+    print(f"Time: {current_time}, {aaip}, {model_to_use}, server status: {server_status}")
+    print(payload_dict)
     if not server_status:
         return JSONResponse(
             status_code=503,
             content={"message": "Server is under maintenance. Please try again later."}
         )
+    scraper = cloudscraper.create_scraper()
     async def stream_generator(payload_dict):
         try:
+            # Send POST request with the correct headers
             response = scraper.post(
+                f"{endpoint}/v1/chat/completions",
+                json=payload_dict,
                 headers=custom_headers,
                 stream=True
             )
+            # Handle response errors
             if response.status_code == 422:
                 raise HTTPException(status_code=422, detail="Unprocessable entity. Check your payload.")
             elif response.status_code == 400:
                 raise HTTPException(status_code=404, detail="The requested resource was not found.")
             elif response.status_code >= 500:
                 raise HTTPException(status_code=500, detail="Server error. Try again later.")
             # Stream response lines to the client
             for line in response.iter_lines():
                 if line:
                     yield line.decode('utf-8') + "\n"
         except requests.exceptions.RequestException as req_err:
             print(response.text)
             raise HTTPException(status_code=500, detail=f"Request failed: {req_err}")
         except Exception as e:
             print(response.text)
             raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
     return StreamingResponse(stream_generator(payload_dict), media_type="application/json")
 # Remove the duplicated endpoint and combine the functionality
 @app.api_route("/images/generations", methods=["GET", "POST"])  # Support both GET and POST