#!/usr/bin/env python3 """ Debug Audio Processing Issue This script reproduces the MP3 audio processing issue that causes malformed responses with "[}]" and UUID artifacts in GAIA evaluation. """ import os import sys import logging import tempfile from pathlib import Path # Add the deployment-ready directory to Python path sys.path.insert(0, str(Path(__file__).parent)) from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent # Configure logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def create_test_mp3_file(): """Create a minimal test MP3 file for debugging.""" # Create a minimal MP3 file (just headers, no actual audio) mp3_header = b'\xff\xfb\x90\x00' + b'\x00' * 100 # Minimal MP3 header + padding with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp: tmp.write(mp3_header) tmp.flush() return tmp.name def test_audio_processing_issue(): """Test audio processing to identify the source of malformed responses.""" logger.info("๐Ÿ› Starting audio processing debug test...") # Create test MP3 file test_mp3_path = create_test_mp3_file() logger.info(f"๐Ÿ“„ Created test MP3 file: {test_mp3_path}") try: # Initialize the agent logger.info("๐Ÿš€ Initializing FixedGAIAAgent...") agent = FixedGAIAAgent() if not agent.available: logger.error("โŒ Agent not available - cannot test") return # Test question with MP3 file test_question = "What is said in this audio file?" test_files = [test_mp3_path] logger.info(f"๐Ÿค” Testing question: {test_question}") logger.info(f"๐Ÿ“Ž With MP3 file: {test_mp3_path}") # Process the question - this should trigger the audio processing logger.info("๐Ÿ”„ Processing question with MP3 file...") result = agent(test_question, test_files) logger.info(f"๐Ÿ“ Raw result: {repr(result)}") logger.info(f"๐ŸŽฏ Final result: '{result}'") # Check for malformed response patterns if "[}]" in result: logger.error("โŒ FOUND '[}]' ARTIFACT in response!") if any(char.isdigit() and char in "0123456789abcdef" for char in result.lower()): # Simple check for potential UUID patterns logger.warning("โš ๏ธ Potential UUID-like patterns detected in response") # Check if result looks like a tool call or JSON if result.startswith('{') or '"name"' in result or '"arguments"' in result: logger.error("โŒ FOUND JSON/TOOL CALL ARTIFACT in response!") return result except Exception as e: logger.error(f"โŒ Error during audio processing test: {e}") import traceback logger.error(f"๐Ÿ“‹ Traceback: {traceback.format_exc()}") return None finally: # Clean up test file try: os.unlink(test_mp3_path) logger.info("๐Ÿงน Cleaned up test MP3 file") except Exception as e: logger.warning(f"โš ๏ธ Failed to clean up test file: {e}") def test_multimodal_tools_directly(): """Test the multimodal tools directly to isolate the issue.""" logger.info("๐Ÿ”ง Testing multimodal tools directly...") try: from agents.mistral_multimodal_agent import OpenSourceMultimodalTools # Initialize multimodal tools multimodal = OpenSourceMultimodalTools() # Create test MP3 file test_mp3_path = create_test_mp3_file() # Test audio transcription directly logger.info("๐ŸŽต Testing audio transcription directly...") transcription = multimodal.transcribe_audio(test_mp3_path) logger.info(f"๐Ÿ“ Direct transcription result: {repr(transcription)}") # Check for artifacts if "[}]" in transcription: logger.error("โŒ FOUND '[}]' ARTIFACT in direct transcription!") if transcription.startswith('{') or '"name"' in transcription: logger.error("โŒ FOUND JSON ARTIFACT in direct transcription!") # Clean up os.unlink(test_mp3_path) return transcription except Exception as e: logger.error(f"โŒ Error testing multimodal tools directly: {e}") import traceback logger.error(f"๐Ÿ“‹ Traceback: {traceback.format_exc()}") return None def main(): """Main debug function.""" logger.info("๐Ÿ› GAIA Audio Processing Debug Tool") logger.info("=" * 50) # Test 1: Direct multimodal tools test logger.info("\n๐Ÿ”ง TEST 1: Direct Multimodal Tools Test") logger.info("-" * 40) direct_result = test_multimodal_tools_directly() # Test 2: Full agent test logger.info("\n๐Ÿค– TEST 2: Full Agent Test") logger.info("-" * 40) agent_result = test_audio_processing_issue() # Summary logger.info("\n๐Ÿ“Š DEBUG SUMMARY") logger.info("=" * 50) logger.info(f"Direct multimodal result: {repr(direct_result)}") logger.info(f"Full agent result: {repr(agent_result)}") # Analysis if direct_result and "[}]" in direct_result: logger.error("๐Ÿšจ ISSUE FOUND: '[}]' artifacts in direct multimodal tools") elif agent_result and "[}]" in agent_result: logger.error("๐Ÿšจ ISSUE FOUND: '[}]' artifacts in agent processing pipeline") else: logger.info("โœ… No '[}]' artifacts detected in this test") if __name__ == "__main__": main()