Spaces:

ParthSadaria
/

lokiai

Running

App Files Files Community

ParthSadaria commited on Nov 12, 2024

Commit

8834a20

verified ·

1 Parent(s): 83ca19d

Update main.py

Browse files

Files changed (1) hide show

main.py +21 -22

main.py CHANGED Viewed

@@ -3,7 +3,7 @@ from dotenv import load_dotenv
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import StreamingResponse, HTMLResponse
 from pydantic import BaseModel
-import requests
 from functools import lru_cache
 # Load environment variables from .env file
@@ -11,9 +11,6 @@ load_dotenv()
 app = FastAPI()
-# Create a session for reusing the HTTP connection
-session = requests.Session()
 # Get API keys and secret endpoint from environment variables
 api_keys_str = os.getenv('API_KEYS')
 valid_api_keys = api_keys_str.split(',') if api_keys_str else []
@@ -55,17 +52,18 @@ async def root():
 # Cache function with lru_cache
 @lru_cache(maxsize=1)
-def get_cached_models():
-    try:
-        response = session.get(f"{secret_api_endpoint}/api/v1/models", timeout=3)
-        response.raise_for_status()
-        return response.json()
-    except requests.exceptions.RequestException as e:
-        raise HTTPException(status_code=500, detail=f"Request failed: {e}")
 @app.get("/models")
 async def get_models():
-    return get_cached_models()
 @app.post("/v1/chat/completions")
 async def get_completion(payload: Payload, request: Request):
@@ -78,16 +76,17 @@ async def get_completion(payload: Payload, request: Request):
     # Prepare the payload for streaming
     payload_dict = {**payload.dict(), "stream": True}
-    # Define a generator to stream the response
-    def stream_generator():
-        try:
-            with session.post(secret_api_endpoint, json=payload_dict, stream=True, timeout=15) as response:
-                response.raise_for_status()
-                for chunk in response.iter_content(chunk_size=1024):
-                    if chunk:  # Only yield non-empty chunks
-                        yield chunk
-        except requests.exceptions.RequestException as e:
-            raise HTTPException(status_code=500, detail=f"Streaming failed: {e}")
     # Return the streaming response
     return StreamingResponse(stream_generator(), media_type="application/json")

 from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import StreamingResponse, HTMLResponse
 from pydantic import BaseModel
+import httpx
 from functools import lru_cache
 # Load environment variables from .env file
 app = FastAPI()
 # Get API keys and secret endpoint from environment variables
 api_keys_str = os.getenv('API_KEYS')
 valid_api_keys = api_keys_str.split(',') if api_keys_str else []
 # Cache function with lru_cache
 @lru_cache(maxsize=1)
+async def get_cached_models():
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.get(f"{secret_api_endpoint}/api/v1/models", timeout=3)
+            response.raise_for_status()
+            return response.json()
+        except httpx.RequestError as e:
+            raise HTTPException(status_code=500, detail=f"Request failed: {e}")
 @app.get("/models")
 async def get_models():
+    return await get_cached_models()
 @app.post("/v1/chat/completions")
 async def get_completion(payload: Payload, request: Request):
     # Prepare the payload for streaming
     payload_dict = {**payload.dict(), "stream": True}
+    # Define an asynchronous generator to stream the response
+    async def stream_generator():
+        async with httpx.AsyncClient() as client:
+            try:
+                async with client.stream("POST", secret_api_endpoint, json=payload_dict, timeout=10) as response:
+                    response.raise_for_status()
+                    async for chunk in response.aiter_bytes(chunk_size=512):  # Smaller chunks for faster response
+                        if chunk:
+                            yield chunk
+            except httpx.RequestError as e:
+                raise HTTPException(status_code=500, detail=f"Streaming failed: {e}")
     # Return the streaming response
     return StreamingResponse(stream_generator(), media_type="application/json")