import pytest import httpx from openai import OpenAI import os import time import random # Configure the OpenAI client to point to the local server. # The API key is required by the library but not used by the local server. client = OpenAI( base_url="http://localhost:8000", api_key=os.environ.get("OPENAI_API_KEY", "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") ) @pytest.mark.asyncio async def test_list_models_openai(): """Test the /models endpoint to get a list of available models.""" print("\nTesting /models endpoint...") try: start_time = time.time() models_response = client.models.list() end_time = time.time() print(f"Received response from /models in {end_time - start_time:.2f}s") assert models_response is not None assert len(models_response.data) > 0, "No models were returned from the /models endpoint." print(f"Found {len(models_response.data)} models:") for model in models_response.data: print(f" - Model ID: {model.id}") assert isinstance(model.id, str) assert model.object == "model" except Exception as e: pytest.fail(f"/models endpoint test failed using openai lib: {e}") @pytest.mark.asyncio async def test_chat_completion_streaming_openai(): """ Test the /chat/completions endpoint for streaming requests. This test first fetches an available model from the /models endpoint. """ # Step 1: Get the list of available models first to make the test robust. print("\nFetching available models before chat completion test...") try: models_response = client.models.list() assert len(models_response.data) > 0, "Cannot run chat completion test: no models available." # Use the first model from the list for the test. model_to_test = models_response.data[random.randint(0, len(models_response.data) - 1)].id print(f"Will use model '{model_to_test}' for the test.") except Exception as e: pytest.fail(f"Failed to fetch models before running chat completion test: {e}") # Step 2: Use the fetched model to run the chat completion test. messages = [{"role": "user", "content": "squirrel pet simulator in python with emojis"}] print(f"\nTesting streaming chat completion with model '{model_to_test}'...") accumulated_content = "" deltas_received = 0 start_time = time.time() try: stream = client.chat.completions.create( model=model_to_test, messages=messages, stream=True ) for chunk in stream: assert chunk.id is not None assert chunk.object == "chat.completion.chunk" assert len(chunk.choices) > 0 choice = chunk.choices[0] delta = choice.delta if delta.role is not None: print(f"\nReceived role: {delta.role}") assert delta.role == "assistant" if delta.content is not None: delta_content = delta.content assert isinstance(delta_content, str) accumulated_content += delta_content deltas_received += 1 print(delta_content, end="", flush=True) if choice.finish_reason is not None: print(f"\nReceived finish_reason: {choice.finish_reason}") assert choice.finish_reason == "stop" break end_time = time.time() print(f"\n--- Streaming complete ---") print(f"Total deltas received: {deltas_received}") print(f"Full response ({len(accumulated_content)} chars) received in {end_time - start_time:.2f}s:") print(accumulated_content) assert deltas_received > 0, "No content deltas were received." assert len(accumulated_content) > 0, "Accumulated content is empty." assert "squirrel" in accumulated_content.lower(), "Story does not seem to be about a squirrel." except Exception as e: pytest.fail(f"Streaming chat completion failed: {e}")