Spaces:
Running
Running
import pytest | |
from unittest.mock import patch, MagicMock | |
import importlib | |
from collections import Counter | |
# Patch load_dotenv where it's looked up by app.py (its source module) | |
# Patch ChatOpenAI at its source module | |
# Patch AsyncOpenAI at its source module | |
def test_llm_initialization(mock_async_openai_class, mock_chat_openai_class, mock_load_dotenv_func): | |
"""Test that required LLM instances and OpenAI client are initialized correctly.""" | |
# mock_load_dotenv_func is the mock for dotenv.load_dotenv | |
# mock_chat_openai_class is the mock for langchain_openai.ChatOpenAI | |
# mock_async_openai_class is the mock for openai.AsyncOpenAI | |
import app | |
importlib.reload(app) # Force reload app module AFTER mocks are applied | |
app.initialize_configurations() # Call the new initialization function | |
# Check that load_dotenv was called | |
assert mock_load_dotenv_func.called | |
# Check that AsyncOpenAI client was initialized | |
assert mock_async_openai_class.called | |
mock_async_openai_class.assert_called_once() | |
expected_configs = { | |
'llm_planner': {'model': 'gpt-3.5-turbo', 'temperature': 0.1}, | |
'llm_synthesizer': {'model': 'gpt-4o-mini', 'temperature': 0.4}, | |
'llm_direct': {'model': 'gpt-3.5-turbo', 'temperature': 0.3}, | |
'llm_analytical': {'model': 'gpt-3.5-turbo', 'temperature': 0.2}, | |
'llm_scientific': {'model': 'gpt-3.5-turbo', 'temperature': 0.3}, | |
'llm_philosophical': {'model': 'gpt-4o-mini', 'temperature': 0.5}, | |
'llm_factual': {'model': 'gpt-3.5-turbo', 'temperature': 0.1}, | |
'llm_metaphorical': {'model': 'gpt-4o-mini', 'temperature': 0.6}, | |
'llm_futuristic': {'model': 'gpt-4o-mini', 'temperature': 0.6}, | |
'llm_mermaid_generator': {'model': 'gpt-3.5-turbo', 'temperature': 0.1}, | |
} | |
call_args_list = mock_chat_openai_class.call_args_list | |
# Expect each of the 10 configs to be called once | |
expected_total_calls = len(expected_configs) | |
assert len(call_args_list) == expected_total_calls, \ | |
f"Expected {expected_total_calls} LLM initializations, but found {len(call_args_list)}" | |
actual_calls_configs = [] | |
for call in call_args_list: | |
_, kwargs = call | |
actual_calls_configs.append({'model': kwargs.get('model'), 'temperature': kwargs.get('temperature')}) | |
# Convert dicts to a hashable type (tuple of sorted items) for Counter | |
actual_configs_tuples = [tuple(sorted(c.items())) for c in actual_calls_configs] | |
actual_counts = Counter(actual_configs_tuples) | |
# Count how many times each unique configuration is expected based on the definitions | |
# This accounts for the same config being used for multiple LLM variables (e.g. planner and factual) | |
expected_config_definitions_as_tuples = [tuple(sorted(c.items())) for c in expected_configs.values()] | |
expected_definition_counts = Counter(expected_config_definitions_as_tuples) | |
for config_tuple, num_definitions in expected_definition_counts.items(): | |
# Each definition now runs only once | |
expected_actual_calls_for_this_config = num_definitions | |
assert actual_counts[config_tuple] == expected_actual_calls_for_this_config, \ | |
f"Expected config {dict(config_tuple)} (defined {num_definitions} times) to be actually called {expected_actual_calls_for_this_config} times, but found {actual_counts[config_tuple]} times." | |
assert len(actual_counts) == len(expected_definition_counts), \ | |
f"Found {len(actual_counts)} unique LLM configurations in actual calls, but expected {len(expected_definition_counts)} unique configurations to be defined." |