Final_Assignment_Template

Running

Final_Assignment_Template / test_agent.py

Refactor app.py and update import paths in test_agent.py to improve code organization. Introduce new files for agent configuration, graph definition, and tools, enhancing the overall structure and functionality of the agent system.

43a2e87 unverified about 2 months ago

raw

history blame

10.6 kB

	import logging

	import pytest

	from api.runner import AgentRunner

	# Configure test logger
	test_logger = logging.getLogger("test_agent")
	test_logger.setLevel(logging.INFO)

	# Suppress specific warnings
	pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models")

	# Constants
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"


	@pytest.fixture(scope="session")
	def agent():
	"""Fixture to create and return an AgentRunner instance."""
	test_logger.info("Creating AgentRunner instance")
	return AgentRunner()


	# @pytest.fixture(scope="session")
	# def questions_data():
	# """Fixture to fetch questions from the API."""
	# test_logger.info(f"Fetching questions from: {QUESTIONS_URL}")
	# try:
	# response = requests.get(QUESTIONS_URL, timeout=15)
	# response.raise_for_status()
	# data = response.json()
	# if not data:
	# test_logger.error("Fetched questions list is empty.")
	# return []
	# test_logger.info(f"Fetched {len(data)} questions.")
	# return data
	# except requests.exceptions.RequestException as e:
	# test_logger.error(f"Error fetching questions: {e}")
	# return []
	# except requests.exceptions.JSONDecodeError as e:
	# test_logger.error(f"Error decoding JSON response from questions endpoint: {e}")
	# return []
	# except Exception as e:
	# test_logger.error(f"An unexpected error occurred fetching questions: {e}")
	# return []
	#
	# class TestAppQuestions:
	# """Test cases for questions from the app."""
	#
	# def test_first_app_question(self, agent, questions_data):
	# """Test the agent's response to the first app question."""
	# if not questions_data:
	# pytest.skip("No questions available from API")
	#
	# first_question = questions_data[0]
	# question_text = first_question.get("question")
	# task_id = first_question.get("task_id")
	#
	# if not question_text or not task_id:
	# pytest.skip("First question is missing required fields")
	#
	# test_logger.info(f"Testing with app question: {question_text}")
	#
	# response = agent(question_text)
	# test_logger.info(f"Agent response: {response}")
	#
	# # Check that the response contains the expected information
	# assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa"
	# assert "studio albums" in response.lower(), "Response should mention studio albums"
	# assert "2000" in response and "2009" in response, "Response should mention the year range"
	#
	# # Verify that a number is mentioned (either as word or digit)
	# import re
	# number_pattern = r'\b(one\|two\|three\|four\|five\|six\|seven\|eight\|nine\|ten\|\d+)\b'
	# has_number = bool(re.search(number_pattern, response.lower()))
	# assert has_number, "Response should include the number of albums"
	#
	# # Check for album names in the response
	# known_albums = [
	# "Corazón Libre",
	# "Cantora",
	# "Hermano",
	# "Acústico",
	# "Argentina quiere cantar"
	# ]
	# found_albums = [album for album in known_albums if album in response]
	# assert len(found_albums) > 0, "Response should mention at least some of the known albums"
	#
	# # Check for a structured response
	# assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \
	# "Response should list albums with years"


	class TestBasicCodeAgentCapabilities:
	"""Test basic capabilities of the code agent."""

	def setup_method(self):
	"""Setup method to initialize the agent before each test."""
	test_logger.info("Creating AgentRunner instance")
	self.agent = AgentRunner()

	def test_simple_math_calculation_with_steps(self):
	"""Test that the agent can perform basic math calculations and log steps."""
	question = "What is the result of the following operation: 5 + 3 + 1294.678?"
	test_logger.info(f"Testing math calculation with question: {question}")

	# Run the agent and get the response
	response = self.agent(question)

	# Verify the response contains the correct result
	expected_result = str(5 + 3 + 1294.678)
	assert (
	expected_result in response
	), f"Response should contain the result {expected_result}"

	# Verify step logs exist and have required fields
	assert self.agent.last_state is not None, "Agent should store last state"
	assert "step_logs" in self.agent.last_state, "State should contain step_logs"
	assert (
	len(self.agent.last_state["step_logs"]) > 0
	), "Should have at least one step logged"

	# Verify each step has required fields
	for step in self.agent.last_state["step_logs"]:
	assert "step_number" in step, "Each step should have a step_number"
	assert any(
	key in step for key in ["thought", "code", "observation"]
	), "Each step should have at least one of: thought, code, or observation"

	# Verify the final answer is indicated
	assert (
	"final_answer" in response.lower()
	), "Response should indicate it's providing an answer"

	def test_document_qa_and_image_generation_with_steps(self):
	"""Test that the agent can search for information and generate images, with step logging."""
	question = (
	"Search for information about the Mona Lisa and generate an image of it."
	)
	test_logger.info(
	f"Testing document QA and image generation with question: {question}"
	)

	# Run the agent and get the response
	response = self.agent(question)

	# Verify the response contains both search and image generation
	assert "mona lisa" in response.lower(), "Response should mention Mona Lisa"
	assert "image" in response.lower(), "Response should mention image generation"

	# Verify step logs exist and show logical progression
	assert self.agent.last_state is not None, "Agent should store last state"
	assert "step_logs" in self.agent.last_state, "State should contain step_logs"
	assert (
	len(self.agent.last_state["step_logs"]) > 1
	), "Should have multiple steps logged"

	# Verify steps show logical progression
	steps = self.agent.last_state["step_logs"]
	search_steps = [step for step in steps if "search" in str(step).lower()]
	image_steps = [step for step in steps if "image" in str(step).lower()]

	assert len(search_steps) > 0, "Should have search steps"
	assert len(image_steps) > 0, "Should have image generation steps"

	# Verify each step has required fields
	for step in steps:
	assert "step_number" in step, "Each step should have a step_number"
	assert any(
	key in step for key in ["thought", "code", "observation"]
	), "Each step should have at least one of: thought, code, or observation"


	def test_simple_math_calculation_with_steps():
	"""Test that the agent can perform a simple math calculation and verify intermediate steps."""
	agent = AgentRunner()
	question = "What is the result of the following operation: 5 + 3 + 1294.678?"

	# Process the question
	response = agent(question)

	# Verify step logs exist and have required fields
	assert agent.last_state is not None, "Last state should be stored"
	step_logs = agent.last_state.get("step_logs", [])
	assert len(step_logs) > 0, "Should have recorded step logs"

	for step in step_logs:
	assert "step_number" in step, "Each step should have a step number"
	assert any(
	key in step for key in ["thought", "code", "observation"]
	), "Each step should have at least one of thought/code/observation"

	# Verify final answer
	expected_result = 1302.678

	# Extract all numbers from the response
	import re

	# First check for LaTeX formatting
	latex_match = re.search(r"\\boxed{([^}]+)}", response)
	if latex_match:
	# Extract number from LaTeX box
	latex_content = latex_match.group(1)
	numbers = re.findall(r"\d+\.?\d*", latex_content)
	else:
	# Extract all numbers from the response
	numbers = re.findall(r"\d+\.?\d*", response)

	assert numbers, "Response should contain at least one number"

	# Check if any number matches the expected result
	has_correct_result = any(abs(float(n) - expected_result) < 0.001 for n in numbers)
	assert (
	has_correct_result
	), f"Response should contain the result {expected_result}, got {response}"

	# Verify the response indicates it's a final answer
	assert (
	"final_answer" in response.lower()
	), "Response should indicate it's using final_answer"


	def test_document_qa_and_image_generation_with_steps():
	"""Test document QA and image generation with step verification."""
	agent = AgentRunner()
	question = "Can you search for information about the Mona Lisa and generate an image inspired by it?"

	# Process the question
	response = agent(question)

	# Verify step logs exist and demonstrate logical progression
	assert agent.last_state is not None, "Last state should be stored"
	step_logs = agent.last_state.get("step_logs", [])
	assert len(step_logs) > 0, "Should have recorded step logs"

	# Check for search and image generation steps
	has_search_step = False
	has_image_step = False

	for step in step_logs:
	assert "step_number" in step, "Each step should have a step number"
	assert any(
	key in step for key in ["thought", "code", "observation"]
	), "Each step should have at least one of thought/code/observation"

	# Look for search and image steps in thoughts or code
	step_content = str(step.get("thought", "")) + str(step.get("code", ""))
	if "search" in step_content.lower():
	has_search_step = True
	if "image" in step_content.lower() or "dalle" in step_content.lower():
	has_image_step = True

	assert has_search_step, "Should include a search step"
	assert has_image_step, "Should include an image generation step"
	assert (
	"final_answer" in response.lower()
	), "Response should indicate it's using final_answer"


	if __name__ == "__main__":
	pytest.main([__file__, "-s", "-v", "-x"])