Spaces:

ParthSadaria
/

lokiai

Running

App Files Files Community

ParthSadaria commited on Mar 16

Commit

4bcb2c2

verified ·

1 Parent(s): 4af9da3

Update main.py

Browse files

Files changed (1) hide show

main.py +208 -377

main.py CHANGED Viewed

@@ -19,12 +19,13 @@ from concurrent.futures import ThreadPoolExecutor
 import uvloop
 from fastapi.middleware.gzip import GZipMiddleware
 from starlette.middleware.cors import CORSMiddleware
 # Enable uvloop for faster event loop
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
 # Thread pool for CPU-bound operations
-executor = ThreadPoolExecutor(max_workers=8)
 # Load environment variables once at startup
 load_dotenv()
@@ -59,7 +60,6 @@ def get_env_vars():
         'secret_api_endpoint_4': "https://text.pollinations.ai/openai",
         'mistral_api': "https://api.mistral.ai",
         'mistral_key': os.getenv('MISTRAL_KEY'),
-        'image_endpoint': os.getenv("IMAGE_ENDPOINT"),
         'endpoint_origin': os.getenv('ENDPOINT_ORIGIN')
     }
@@ -128,12 +128,12 @@ available_model_ids: List[str] = []
 def get_async_client():
     return httpx.AsyncClient(
         timeout=60.0,
-        limits=httpx.Limits(max_keepalive_connections=20, max_connections=100)
     )
 # Create a cloudscraper pool
 scraper_pool = []
-MAX_SCRAPERS = 10
 def get_scraper():
     if not scraper_pool:
@@ -197,83 +197,86 @@ async def get_models():
         raise HTTPException(status_code=500, detail="Error loading available models")
     return models_data
-# Searcher function with optimized streaming - moved to a separate thread
 async def generate_search_async(query: str, systemprompt: Optional[str] = None, stream: bool = True):
-    loop = asyncio.get_running_loop()
-    def _generate_search():
-        headers = {"User-Agent": ""}
-        # Use the provided system prompt, or default to "Be Helpful and Friendly"
-        system_message = systemprompt or "Be Helpful and Friendly"
-        # Create the prompt history with the user query and system message
-        prompt = [
-            {"role": "user", "content": query},
-        ]
-        prompt.insert(0, {"content": system_message, "role": "system"})
-        # Prepare the payload for the API request
-        payload = {
-            "is_vscode_extension": True,
-            "message_history": prompt,
-            "requested_model": "Claude 3.7 Sonnet",
-            "user_input": prompt[-1]["content"],
-        }
-        # Get endpoint from environment
-        secret_api_endpoint_3 = get_env_vars()['secret_api_endpoint_3']
-        if not secret_api_endpoint_3:
-            raise ValueError("Search API endpoint not configured")
-        # Send the request to the chat endpoint using a scraper from the pool
-        response = get_scraper().post(
-            secret_api_endpoint_3,
-            headers=headers,
-            json=payload,
-            stream=True
-        )
-        result = []
-        streaming_text = ""
-        # Process the streaming response
-        for value in response.iter_lines(decode_unicode=True):
-            if value.startswith("data: "):
-                try:
-                    json_modified_value = json.loads(value[6:])
-                    content = json_modified_value.get("choices", [{}])[0].get("delta", {}).get("content", "")
-                    if content.strip():  # Only process non-empty content
-                        cleaned_response = {
-                            "created": json_modified_value.get("created"),
-                            "id": json_modified_value.get("id"),
-                            "model": "searchgpt",
-                            "object": "chat.completion",
-                            "choices": [
-                                {
-                                    "message": {
-                                        "content": content
                                     }
-                                }
-                            ]
-                        }
-                        if stream:
-                            result.append(f"data: {json.dumps(cleaned_response)}\n\n")
-                        streaming_text += content
-                except json.JSONDecodeError:
-                    continue
-        if not stream:
-            result.append(streaming_text)
-        return result
-    # Run in thread pool to avoid blocking the event loop
-    return await loop.run_in_executor(executor, _generate_search)
 # Cache for frequently accessed static files
 @lru_cache(maxsize=10)
@@ -314,7 +317,7 @@ async def playground():
 async def return_models():
     return await get_models()
-# Search routes
 @app.get("/searchgpt")
 async def search_gpt(q: str, stream: Optional[bool] = False, systemprompt: Optional[str] = None):
     if not q:
@@ -322,22 +325,44 @@ async def search_gpt(q: str, stream: Optional[bool] = False, systemprompt: Optio
     usage_tracker.record_request(endpoint="/searchgpt")
-    result = await generate_search_async(q, systemprompt=systemprompt, stream=stream)
     if stream:
         async def stream_generator():
-            for chunk in result:
-                yield chunk
         return StreamingResponse(
             stream_generator(),
             media_type="text/event-stream"
         )
     else:
-        # For non-streaming, return the collected text
-        return JSONResponse(content={"response": result[0] if result else ""})
-# Chat completion endpoint
 @app.post("/chat/completions")
 @app.post("/api/v1/chat/completions")
 async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
@@ -364,7 +389,10 @@ async def get_completion(payload: Payload, request: Request, authenticated: bool
     # Prepare payload
     payload_dict = payload.dict()
     payload_dict["model"] = model_to_use
     # Get environment variables
     env_vars = get_env_vars()
@@ -384,35 +412,13 @@ async def get_completion(payload: Payload, request: Request, authenticated: bool
         endpoint = env_vars['secret_api_endpoint']
         custom_headers = {}
-    print(f"Using endpoint: {endpoint}")
-    # Create a new scraper for each request to avoid potential blocking
-    scraper = cloudscraper.create_scraper(browser={
-        'browser': 'chrome',
-        'platform': 'windows',
-        'mobile': False
-    })
-    # Set a timeout for the entire request handling
-    TIMEOUT_SECONDS = 20
-    async def stream_generator_with_timeout(payload_dict):
         try:
-            # Create a thread-safe event for cancellation
-            cancel_event = threading.Event()
-            def request_with_timeout():
-                try:
-                    # Send POST request with the correct headers and timeout
-                    response = scraper.post(
-                        f"{endpoint}/v1/chat/completions",
-                        json=payload_dict,
-                        headers=custom_headers,
-                        stream=True,
-                        timeout=TIMEOUT_SECONDS
-                    )
-                    # Handle response errors
                     if response.status_code >= 400:
                         error_messages = {
                             422: "Unprocessable entity. Check your payload.",
@@ -421,68 +427,42 @@ async def get_completion(payload: Payload, request: Request, authenticated: bool
                             404: "The requested resource was not found.",
                         }
                         detail = error_messages.get(response.status_code, f"Error code: {response.status_code}")
-                        return {"error": detail, "status_code": response.status_code}
-                    result = []
-                    # Process the streaming response with timeout checks
-                    for line in response.iter_lines():
-                        # Check for cancellation
-                        if cancel_event.is_set():
-                            break
                         if line:
-                            decoded = line.decode('utf-8') + "\n"
-                            result.append(decoded)
-                    return {"lines": result}
-                except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
-                    return {"error": "Request timed out or connection failed", "status_code": 504}
-                except Exception as e:
-                    return {"error": str(e), "status_code": 500}
-            # Execute request in a ThreadPoolExecutor with a timeout
-            loop = asyncio.get_running_loop()
-            with ThreadPoolExecutor() as pool:
-                response_future = loop.run_in_executor(pool, request_with_timeout)
-                try:
-                    # Wait for response with a timeout
-                    response_data = await asyncio.wait_for(response_future, timeout=TIMEOUT_SECONDS)
-                    # If there was an error, raise an HTTPException
-                    if "error" in response_data:
-                        raise HTTPException(
-                            status_code=response_data.get("status_code", 500),
-                            detail=response_data["error"]
-                        )
-                    # Stream the response lines
-                    for line in response_data.get("lines", []):
-                        yield line
-                except asyncio.TimeoutError:
-                    # Cancel the ongoing request
-                    cancel_event.set()
-                    raise HTTPException(status_code=504, detail="Request timed out after 20 seconds")
         except Exception as e:
             if isinstance(e, HTTPException):
                 raise e
-            # Use a generic error message that doesn't expose internal details
-            raise HTTPException(status_code=500, detail=f"An error occurred while processing your request: {str(e)}")
-    # Return streaming response with proper timeout handling
-    try:
         return StreamingResponse(
-            stream_generator_with_timeout(payload_dict),
-            media_type="application/json"
         )
-    except Exception as e:
-        if isinstance(e, HTTPException):
-            raise e
-        raise HTTPException(status_code=500, detail=f"Failed to initialize streaming response: {str(e)}")
 # Asynchronous logging function
 async def log_request(request, model):
     # Get minimal data for logging
@@ -490,160 +470,6 @@ async def log_request(request, model):
     ip_hash = hash(request.client.host) % 10000  # Hash the IP for privacy
     print(f"Time: {current_time}, IP Hash: {ip_hash}, Model: {model}")
-# Image generation endpoint - optimized to use connection pool
-@app.api_route("/images/generations", methods=["GET", "POST"])
-async def generate_image(
-    prompt: Optional[str] = None,
-    model: str = "flux",
-    seed: Optional[int] = None,
-    width: Optional[int] = None,
-    height: Optional[int] = None,
-    nologo: Optional[bool] = True,
-    private: Optional[bool] = None,
-    enhance: Optional[bool] = None,
-    request: Request = None,
-    authenticated: bool = Depends(verify_api_key)
-):
-    # Validate the image endpoint
-    image_endpoint = get_env_vars()['image_endpoint']
-    if not image_endpoint:
-        raise HTTPException(status_code=500, detail="Image endpoint not configured in environment variables.")
-    usage_tracker.record_request(endpoint="/images/generations")
-    # Handle GET and POST prompts
-    if request.method == "POST":
-        try:
-            body = await request.json()
-            prompt = body.get("prompt", "").strip()
-            if not prompt:
-                raise HTTPException(status_code=400, detail="Prompt cannot be empty")
-        except Exception:
-            raise HTTPException(status_code=400, detail="Invalid JSON payload")
-    elif request.method == "GET":
-        if not prompt or not prompt.strip():
-            raise HTTPException(status_code=400, detail="Prompt cannot be empty")
-        prompt = prompt.strip()
-    # Sanitize and encode the prompt
-    encoded_prompt = httpx.QueryParams({'prompt': prompt}).get('prompt')
-    # Construct the URL with the encoded prompt
-    base_url = image_endpoint.rstrip('/')
-    url = f"{base_url}/{encoded_prompt}"
-    # Prepare query parameters with validation
-    params = {}
-    if model and isinstance(model, str):
-        params['model'] = model
-    if seed is not None and isinstance(seed, int):
-        params['seed'] = seed
-    if width is not None and isinstance(width, int) and 64 <= width <= 2048:
-        params['width'] = width
-    if height is not None and isinstance(height, int) and 64 <= height <= 2048:
-        params['height'] = height
-    if nologo is not None:
-        params['nologo'] = str(nologo).lower()
-    if private is not None:
-        params['private'] = str(private).lower()
-    if enhance is not None:
-        params['enhance'] = str(enhance).lower()
-    try:
-        # Use the shared httpx client for connection pooling
-        client = get_async_client()
-        response = await client.get(url, params=params, follow_redirects=True)
-        # Check for various error conditions
-        if response.status_code != 200:
-            error_messages = {
-                404: "Image generation service not found",
-                400: "Invalid parameters provided to image service",
-                429: "Too many requests to image service",
-            }
-            detail = error_messages.get(response.status_code, f"Image generation failed with status code {response.status_code}")
-            raise HTTPException(status_code=response.status_code, detail=detail)
-        # Verify content type
-        content_type = response.headers.get('content-type', '')
-        if not content_type.startswith('image/'):
-            raise HTTPException(
-                status_code=500,
-                detail="Unexpected content type received from image service"
-            )
-        # Use larger chunks for streaming for better performance
-        async def stream_with_larger_chunks():
-            chunks = []
-            size = 0
-            async for chunk in response.aiter_bytes(chunk_size=16384):  # Use 16KB chunks
-                chunks.append(chunk)
-                size += len(chunk)
-                if size >= 65536:  # Yield every 64KB
-                    yield b''.join(chunks)
-                    chunks = []
-                    size = 0
-            if chunks:
-                yield b''.join(chunks)
-        return StreamingResponse(
-            stream_with_larger_chunks(),
-            media_type=content_type,
-            headers={
-                'Cache-Control': 'no-cache, no-store, must-revalidate',
-                'Pragma': 'no-cache',
-                'Expires': '0'
-            }
-        )
-    except httpx.TimeoutException:
-        raise HTTPException(status_code=504, detail="Image generation request timed out")
-    except httpx.RequestError:
-        raise HTTPException(status_code=500, detail="Failed to contact image service")
-    except Exception:
-        raise HTTPException(status_code=500, detail="Unexpected error during image generation")
-# Meme endpoint with optimized networking
-@app.get("/meme")
-async def get_meme():
-    try:
-        # Use the shared client for connection pooling
-        client = get_async_client()
-        response = await client.get("https://meme-api.com/gimme")
-        response_data = response.json()
-        meme_url = response_data.get("url")
-        if not meme_url:
-            raise HTTPException(status_code=404, detail="No meme found")
-        image_response = await client.get(meme_url, follow_redirects=True)
-        # Use larger chunks for streaming
-        async def stream_with_larger_chunks():
-            chunks = []
-            size = 0
-            async for chunk in image_response.aiter_bytes(chunk_size=16384):
-                chunks.append(chunk)
-                size += len(chunk)
-                if size >= 65536:
-                    yield b''.join(chunks)
-                    chunks = []
-                    size = 0
-            if chunks:
-                yield b''.join(chunks)
-        return StreamingResponse(
-            stream_with_larger_chunks(),
-            media_type=image_response.headers.get("content-type", "image/png"),
-            headers={'Cache-Control': 'max-age=3600'}  # Add caching
-        )
-    except Exception:
-        raise HTTPException(status_code=500, detail="Failed to retrieve meme")
 # Cache usage statistics
 @lru_cache(maxsize=10)
 def get_usage_summary(days=7):
@@ -858,6 +684,45 @@ async def usage_page():
     html_content = get_usage_page_html()
     return HTMLResponse(content=html_content)
 # Utility function for loading model IDs - optimized to run once at startup
 def load_model_ids(json_file_path):
     try:
@@ -877,8 +742,13 @@ async def startup_event():
     # Add all pollinations models to available_model_ids
     available_model_ids.extend(list(pollinations_models))
     available_model_ids = list(set(available_model_ids))  # Remove duplicates
-    print(f"Added Pollinations models. Total available models: {len(available_model_ids)}")
     # Preload scrapers
     for _ in range(MAX_SCRAPERS):
@@ -900,8 +770,6 @@ async def startup_event():
         missing_vars.append('MISTRAL_API')
     if not env_vars['mistral_key'] and any(model in mistral_models for model in available_model_ids):
         missing_vars.append('MISTRAL_KEY')
-    if not env_vars['image_endpoint']:
-        missing_vars.append('IMAGE_ENDPOINT')
     if missing_vars:
         print(f"WARNING: The following environment variables are missing: {', '.join(missing_vars)}")
@@ -923,8 +791,7 @@ async def shutdown_event():
     print("Server shutdown complete!")
-# Server maintenance endpoint
 # Health check endpoint
 @app.get("/health")
 async def health_check():
@@ -937,61 +804,25 @@ async def health_check():
         missing_critical_vars.append('API_KEYS')
     if not env_vars['secret_api_endpoint']:
         missing_critical_vars.append('SECRET_API_ENDPOINT')
-    # Check if models are loaded
-    models_loaded = len(available_model_ids) > 0
-    status = "healthy"
-    if missing_critical_vars or not models_loaded:
-        status = "degraded"
-    return {
-        "status": status,
-        "timestamp": datetime.datetime.utcnow().isoformat(),
-        "uptime": time.time() - usage_tracker.start_time,
-        "models_loaded": models_loaded,
-        "model_count": len(available_model_ids),
-        "issues": {
-            "missing_env_vars": missing_critical_vars,
-            "models_available": models_loaded
-        }
     }
-# Error handlers
-@app.exception_handler(HTTPException)
-async def http_exception_handler(request, exc):
-    """Format HTTP exceptions in a consistent way"""
-    return JSONResponse(
-        status_code=exc.status_code,
-        content={"error": exc.detail}
-    )
-@app.exception_handler(Exception)
-async def general_exception_handler(request, exc):
-    """Handle unexpected exceptions gracefully"""
-    # Log the error for debugging
-    print(f"Unexpected error: {str(exc)}")
-    return JSONResponse(
-        status_code=500,
-        content={"error": "An unexpected error occurred. Please try again later."}
-    )
-# Static files endpoint for serving CSS, JS, etc.
-# Documentation
-# Run the server when executed directly
 if __name__ == "__main__":
     import uvicorn
-    port = int(os.getenv("PORT", 7860))
-    print(f"Starting Lokiai AI server on port {port}")
-    uvicorn.run(
-        "main:app",
-        host="0.0.0.0",
-        port=port,
-        reload=False,
-        log_level="info"
-    )

 import uvloop
 from fastapi.middleware.gzip import GZipMiddleware
 from starlette.middleware.cors import CORSMiddleware
+import contextlib
 # Enable uvloop for faster event loop
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
 # Thread pool for CPU-bound operations
+executor = ThreadPoolExecutor(max_workers=16)  # Increased thread count for better parallelism
 # Load environment variables once at startup
 load_dotenv()
         'secret_api_endpoint_4': "https://text.pollinations.ai/openai",
         'mistral_api': "https://api.mistral.ai",
         'mistral_key': os.getenv('MISTRAL_KEY'),
         'endpoint_origin': os.getenv('ENDPOINT_ORIGIN')
     }
 def get_async_client():
     return httpx.AsyncClient(
         timeout=60.0,
+        limits=httpx.Limits(max_keepalive_connections=50, max_connections=200)  # Increased limits
     )
 # Create a cloudscraper pool
 scraper_pool = []
+MAX_SCRAPERS = 20  # Increased pool size
 def get_scraper():
     if not scraper_pool:
         raise HTTPException(status_code=500, detail="Error loading available models")
     return models_data
+# Enhanced async streaming - now with real-time SSE support
 async def generate_search_async(query: str, systemprompt: Optional[str] = None, stream: bool = True):
+    # Create a streaming response channel using asyncio.Queue
+    queue = asyncio.Queue()
+    async def _fetch_search_data():
+        try:
+            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
+            # Use the provided system prompt, or default to "Be Helpful and Friendly"
+            system_message = systemprompt or "Be Helpful and Friendly"
+            # Create the prompt history
+            prompt = [
+                {"role": "user", "content": query},
+            ]
+            prompt.insert(0, {"content": system_message, "role": "system"})
+            # Prepare the payload for the API request
+            payload = {
+                "is_vscode_extension": True,
+                "message_history": prompt,
+                "requested_model": "Claude 3.7 Sonnet",
+                "user_input": prompt[-1]["content"],
+            }
+            # Get endpoint from environment
+            secret_api_endpoint_3 = get_env_vars()['secret_api_endpoint_3']
+            if not secret_api_endpoint_3:
+                await queue.put({"error": "Search API endpoint not configured"})
+                return
+            # Use AsyncClient for better performance
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                async with client.stream("POST", secret_api_endpoint_3, json=payload, headers=headers) as response:
+                    if response.status_code != 200:
+                        await queue.put({"error": f"Search API returned status code {response.status_code}"})
+                        return
+                    # Process the streaming response in real-time
+                    buffer = ""
+                    async for line in response.aiter_lines():
+                        if line.startswith("data: "):
+                            try:
+                                json_data = json.loads(line[6:])
+                                content = json_data.get("choices", [{}])[0].get("delta", {}).get("content", "")
+                                if content.strip():
+                                    cleaned_response = {
+                                        "created": json_data.get("created"),
+                                        "id": json_data.get("id"),
+                                        "model": "searchgpt",
+                                        "object": "chat.completion",
+                                        "choices": [
+                                            {
+                                                "message": {
+                                                    "content": content
+                                                }
+                                            }
+                                        ]
                                     }
+                                    # Send to queue immediately for streaming
+                                    await queue.put({"data": f"data: {json.dumps(cleaned_response)}\n\n", "text": content})
+                            except json.JSONDecodeError:
+                                continue
+            # Signal completion
+            await queue.put(None)
+        except Exception as e:
+            await queue.put({"error": str(e)})
+            await queue.put(None)
+    # Start the fetch process
+    asyncio.create_task(_fetch_search_data())
+    # Return the queue for consumption
+    return queue
 # Cache for frequently accessed static files
 @lru_cache(maxsize=10)
 async def return_models():
     return await get_models()
+# Search routes with enhanced real-time streaming
 @app.get("/searchgpt")
 async def search_gpt(q: str, stream: Optional[bool] = False, systemprompt: Optional[str] = None):
     if not q:
     usage_tracker.record_request(endpoint="/searchgpt")
+    queue = await generate_search_async(q, systemprompt=systemprompt, stream=True)
     if stream:
         async def stream_generator():
+            collected_text = ""
+            while True:
+                item = await queue.get()
+                if item is None:
+                    break
+                if "error" in item:
+                    yield f"data: {json.dumps({'error': item['error']})}\n\n"
+                    break
+                if "data" in item:
+                    yield item["data"]
+                    collected_text += item.get("text", "")
         return StreamingResponse(
             stream_generator(),
             media_type="text/event-stream"
         )
     else:
+        # For non-streaming, collect all text and return at once
+        collected_text = ""
+        while True:
+            item = await queue.get()
+            if item is None:
+                break
+            if "error" in item:
+                raise HTTPException(status_code=500, detail=item["error"])
+            collected_text += item.get("text", "")
+        return JSONResponse(content={"response": collected_text})
+# Enhanced streaming with direct SSE pass-through for real-time responses
 @app.post("/chat/completions")
 @app.post("/api/v1/chat/completions")
 async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
     # Prepare payload
     payload_dict = payload.dict()
     payload_dict["model"] = model_to_use
+    # Ensure stream is True for real-time streaming (can be overridden by client)
+    stream_enabled = payload_dict.get("stream", True)
     # Get environment variables
     env_vars = get_env_vars()
         endpoint = env_vars['secret_api_endpoint']
         custom_headers = {}
+    print(f"Using endpoint: {endpoint} for model: {model_to_use}")
+    # Improved real-time streaming handler
+    async def real_time_stream_generator():
         try:
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                async with client.stream("POST", f"{endpoint}/v1/chat/completions", json=payload_dict, headers=custom_headers) as response:
                     if response.status_code >= 400:
                         error_messages = {
                             422: "Unprocessable entity. Check your payload.",
                             404: "The requested resource was not found.",
                         }
                         detail = error_messages.get(response.status_code, f"Error code: {response.status_code}")
+                        raise HTTPException(status_code=response.status_code, detail=detail)
+                    # Stream the response in real-time with minimal buffering
+                    async for line in response.aiter_lines():
                         if line:
+                            # Yield immediately for faster streaming
+                            yield line + "\n"
+        except httpx.TimeoutException:
+            raise HTTPException(status_code=504, detail="Request timed out")
+        except httpx.RequestError as e:
+            raise HTTPException(status_code=502, detail=f"Failed to connect to upstream API: {str(e)}")
         except Exception as e:
             if isinstance(e, HTTPException):
                 raise e
+            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+    # Return streaming response with proper headers
+    if stream_enabled:
         return StreamingResponse(
+            real_time_stream_generator(),
+            media_type="text/event-stream",
+            headers={
+                "Content-Type": "text/event-stream",
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "X-Accel-Buffering": "no"  # Disable proxy buffering for Nginx
+            }
         )
+    else:
+        # For non-streaming requests, collect the entire response
+        response_content = []
+        async for chunk in real_time_stream_generator():
+            response_content.append(chunk)
+        return JSONResponse(content=json.loads(''.join(response_content)))
 # Asynchronous logging function
 async def log_request(request, model):
     # Get minimal data for logging
     ip_hash = hash(request.client.host) % 10000  # Hash the IP for privacy
     print(f"Time: {current_time}, IP Hash: {ip_hash}, Model: {model}")
 # Cache usage statistics
 @lru_cache(maxsize=10)
 def get_usage_summary(days=7):
     html_content = get_usage_page_html()
     return HTMLResponse(content=html_content)
+# Meme endpoint with optimized networking
+@app.get("/meme")
+async def get_meme():
+    try:
+        # Use the shared client for connection pooling
+        client = get_async_client()
+        response = await client.get("https://meme-api.com/gimme")
+        response_data = response.json()
+        meme_url = response_data.get("url")
+        if not meme_url:
+            raise HTTPException(status_code=404, detail="No meme found")
+        image_response = await client.get(meme_url, follow_redirects=True)
+        # Use larger chunks for streaming
+        async def stream_with_larger_chunks():
+            chunks = []
+            size = 0
+            async for chunk in image_response.aiter_bytes(chunk_size=16384):
+                chunks.append(chunk)
+                size += len(chunk)
+                if size >= 65536:
+                    yield b''.join(chunks)
+                    chunks = []
+                    size = 0
+            if chunks:
+                yield b''.join(chunks)
+        return StreamingResponse(
+            stream_with_larger_chunks(),
+            media_type=image_response.headers.get("content-type", "image/png"),
+            headers={'Cache-Control': 'max-age=3600'}  # Add caching
+        )
+    except Exception:
+        raise HTTPException(status_code=500, detail="Failed to retrieve meme")
 # Utility function for loading model IDs - optimized to run once at startup
 def load_model_ids(json_file_path):
     try:
     # Add all pollinations models to available_model_ids
     available_model_ids.extend(list(pollinations_models))
+    # Add alternate models to available_model_ids
+    available_model_ids.extend(list(alternate_models))
+    # Add mistral models to available_model_ids
+    available_model_ids.extend(list(mistral_models))
     available_model_ids = list(set(available_model_ids))  # Remove duplicates
+    print(f"Total available models: {len(available_model_ids)}")
     # Preload scrapers
     for _ in range(MAX_SCRAPERS):
         missing_vars.append('MISTRAL_API')
     if not env_vars['mistral_key'] and any(model in mistral_models for model in available_model_ids):
         missing_vars.append('MISTRAL_KEY')
     if missing_vars:
         print(f"WARNING: The following environment variables are missing: {', '.join(missing_vars)}")
     print("Server shutdown complete!")
+# Health check endpoint
 # Health check endpoint
 @app.get("/health")
 async def health_check():
         missing_critical_vars.append('API_KEYS')
     if not env_vars['secret_api_endpoint']:
         missing_critical_vars.append('SECRET_API_ENDPOINT')
+    if not env_vars['secret_api_endpoint_2']:
+        missing_critical_vars.append('SECRET_API_ENDPOINT_2')
+    if not env_vars['secret_api_endpoint_3']:
+        missing_critical_vars.append('SECRET_API_ENDPOINT_3')
+    if not env_vars['secret_api_endpoint_4']:
+        missing_critical_vars.append('SECRET_API_ENDPOINT_4')
+    if not env_vars['mistral_api']:
+        missing_critical_vars.append('MISTRAL_API')
+    if not env_vars['mistral_key']:
+        missing_critical_vars.append('MISTRAL_KEY')
+    health_status = {
+        "status": "healthy" if not missing_critical_vars else "unhealthy",
+        "missing_env_vars": missing_critical_vars,
+        "server_status": server_status,
+        "message": "Everything's lit! 🚀" if not missing_critical_vars else "Uh oh, some env vars are missing. 😬"
     }
+    return JSONResponse(content=health_status)
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)