Spaces:

suh4s
/

InsightFlowAI_test

Running

InsightFlowAI_test / tests /test_llm_config.py

suh4s

Working AIE midterm InsightFlow AI

31add3b 25 days ago

3.74 kB

	import pytest
	from unittest.mock import patch, MagicMock
	import importlib
	from collections import Counter

	# Patch load_dotenv where it's looked up by app.py (its source module)
	@patch('dotenv.load_dotenv')
	@patch('langchain_openai.ChatOpenAI') # Patch ChatOpenAI at its source module
	@patch('openai.AsyncOpenAI') # Patch AsyncOpenAI at its source module
	def test_llm_initialization(mock_async_openai_class, mock_chat_openai_class, mock_load_dotenv_func):
	"""Test that required LLM instances and OpenAI client are initialized correctly."""
	# mock_load_dotenv_func is the mock for dotenv.load_dotenv
	# mock_chat_openai_class is the mock for langchain_openai.ChatOpenAI
	# mock_async_openai_class is the mock for openai.AsyncOpenAI

	import app
	importlib.reload(app) # Force reload app module AFTER mocks are applied
	app.initialize_configurations() # Call the new initialization function

	# Check that load_dotenv was called
	assert mock_load_dotenv_func.called

	# Check that AsyncOpenAI client was initialized
	assert mock_async_openai_class.called
	mock_async_openai_class.assert_called_once()

	expected_configs = {
	'llm_planner': {'model': 'gpt-3.5-turbo', 'temperature': 0.1},
	'llm_synthesizer': {'model': 'gpt-4o-mini', 'temperature': 0.4},
	'llm_direct': {'model': 'gpt-3.5-turbo', 'temperature': 0.3},
	'llm_analytical': {'model': 'gpt-3.5-turbo', 'temperature': 0.2},
	'llm_scientific': {'model': 'gpt-3.5-turbo', 'temperature': 0.3},
	'llm_philosophical': {'model': 'gpt-4o-mini', 'temperature': 0.5},
	'llm_factual': {'model': 'gpt-3.5-turbo', 'temperature': 0.1},
	'llm_metaphorical': {'model': 'gpt-4o-mini', 'temperature': 0.6},
	'llm_futuristic': {'model': 'gpt-4o-mini', 'temperature': 0.6},
	'llm_mermaid_generator': {'model': 'gpt-3.5-turbo', 'temperature': 0.1},
	}

	call_args_list = mock_chat_openai_class.call_args_list

	# Expect each of the 10 configs to be called once
	expected_total_calls = len(expected_configs)
	assert len(call_args_list) == expected_total_calls, \
	f"Expected {expected_total_calls} LLM initializations, but found {len(call_args_list)}"

	actual_calls_configs = []
	for call in call_args_list:
	_, kwargs = call
	actual_calls_configs.append({'model': kwargs.get('model'), 'temperature': kwargs.get('temperature')})

	# Convert dicts to a hashable type (tuple of sorted items) for Counter
	actual_configs_tuples = [tuple(sorted(c.items())) for c in actual_calls_configs]
	actual_counts = Counter(actual_configs_tuples)

	# Count how many times each unique configuration is expected based on the definitions
	# This accounts for the same config being used for multiple LLM variables (e.g. planner and factual)
	expected_config_definitions_as_tuples = [tuple(sorted(c.items())) for c in expected_configs.values()]
	expected_definition_counts = Counter(expected_config_definitions_as_tuples)

	for config_tuple, num_definitions in expected_definition_counts.items():
	# Each definition now runs only once
	expected_actual_calls_for_this_config = num_definitions
	assert actual_counts[config_tuple] == expected_actual_calls_for_this_config, \
	f"Expected config {dict(config_tuple)} (defined {num_definitions} times) to be actually called {expected_actual_calls_for_this_config} times, but found {actual_counts[config_tuple]} times."

	assert len(actual_counts) == len(expected_definition_counts), \
	f"Found {len(actual_counts)} unique LLM configurations in actual calls, but expected {len(expected_definition_counts)} unique configurations to be defined."