File size: 4,194 Bytes
314597a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import pytest
import httpx
from openai import OpenAI
import os
import time
import random

# Configure the OpenAI client to point to the local server.
# The API key is required by the library but not used by the local server.
client = OpenAI(
    base_url="http://localhost:8000",
    api_key=os.environ.get("OPENAI_API_KEY", "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") 
)

@pytest.mark.asyncio
async def test_list_models_openai():
    """Test the /models endpoint to get a list of available models."""
    print("\nTesting /models endpoint...")
    try:
        start_time = time.time()
        models_response = client.models.list()
        end_time = time.time()
        
        print(f"Received response from /models in {end_time - start_time:.2f}s")
        assert models_response is not None
        assert len(models_response.data) > 0, "No models were returned from the /models endpoint."
        
        print(f"Found {len(models_response.data)} models:")
        for model in models_response.data:
            print(f" - Model ID: {model.id}")
            assert isinstance(model.id, str)
            assert model.object == "model"
        
    except Exception as e:
        pytest.fail(f"/models endpoint test failed using openai lib: {e}")

@pytest.mark.asyncio
async def test_chat_completion_streaming_openai():
    """
    Test the /chat/completions endpoint for streaming requests.
    This test first fetches an available model from the /models endpoint.
    """
    
    # Step 1: Get the list of available models first to make the test robust.
    print("\nFetching available models before chat completion test...")
    try:
        models_response = client.models.list()
        assert len(models_response.data) > 0, "Cannot run chat completion test: no models available."
        # Use the first model from the list for the test.
        model_to_test = models_response.data[random.randint(0, len(models_response.data) - 1)].id
        print(f"Will use model '{model_to_test}' for the test.")
    except Exception as e:
        pytest.fail(f"Failed to fetch models before running chat completion test: {e}")

    # Step 2: Use the fetched model to run the chat completion test.
    messages = [{"role": "user", "content": "squirrel pet simulator in python with emojis"}]
    
    print(f"\nTesting streaming chat completion with model '{model_to_test}'...")
    accumulated_content = ""
    deltas_received = 0
    start_time = time.time()
    
    try:
        stream = client.chat.completions.create(
            model=model_to_test,
            messages=messages,
            stream=True
        )  
        
        for chunk in stream:
            assert chunk.id is not None
            assert chunk.object == "chat.completion.chunk"
            assert len(chunk.choices) > 0
            
            choice = chunk.choices[0]
            delta = choice.delta
            
            if delta.role is not None:
                print(f"\nReceived role: {delta.role}")
                assert delta.role == "assistant"

            if delta.content is not None:
                delta_content = delta.content
                assert isinstance(delta_content, str)
                
                accumulated_content += delta_content
                deltas_received += 1
                print(delta_content, end="", flush=True) 
            
            if choice.finish_reason is not None:
                print(f"\nReceived finish_reason: {choice.finish_reason}")
                assert choice.finish_reason == "stop"
                break

        end_time = time.time()
        print(f"\n--- Streaming complete ---")
        print(f"Total deltas received: {deltas_received}")
        print(f"Full response ({len(accumulated_content)} chars) received in {end_time - start_time:.2f}s:")
        print(accumulated_content)
        
        assert deltas_received > 0, "No content deltas were received."
        assert len(accumulated_content) > 0, "Accumulated content is empty."
        assert "squirrel" in accumulated_content.lower(), "Story does not seem to be about a squirrel."

    except Exception as e:
        pytest.fail(f"Streaming chat completion failed: {e}")