gaia-enhanced-agent / debug_audio_processing.py
GAIA Agent Deployment
Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements
9a6a4dc
#!/usr/bin/env python3
"""
Debug Audio Processing Issue
This script reproduces the MP3 audio processing issue that causes
malformed responses with "[}]" and UUID artifacts in GAIA evaluation.
"""
import os
import sys
import logging
import tempfile
from pathlib import Path
# Add the deployment-ready directory to Python path
sys.path.insert(0, str(Path(__file__).parent))
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def create_test_mp3_file():
"""Create a minimal test MP3 file for debugging."""
# Create a minimal MP3 file (just headers, no actual audio)
mp3_header = b'\xff\xfb\x90\x00' + b'\x00' * 100 # Minimal MP3 header + padding
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
tmp.write(mp3_header)
tmp.flush()
return tmp.name
def test_audio_processing_issue():
"""Test audio processing to identify the source of malformed responses."""
logger.info("πŸ› Starting audio processing debug test...")
# Create test MP3 file
test_mp3_path = create_test_mp3_file()
logger.info(f"πŸ“„ Created test MP3 file: {test_mp3_path}")
try:
# Initialize the agent
logger.info("πŸš€ Initializing FixedGAIAAgent...")
agent = FixedGAIAAgent()
if not agent.available:
logger.error("❌ Agent not available - cannot test")
return
# Test question with MP3 file
test_question = "What is said in this audio file?"
test_files = [test_mp3_path]
logger.info(f"πŸ€” Testing question: {test_question}")
logger.info(f"πŸ“Ž With MP3 file: {test_mp3_path}")
# Process the question - this should trigger the audio processing
logger.info("πŸ”„ Processing question with MP3 file...")
result = agent(test_question, test_files)
logger.info(f"πŸ“ Raw result: {repr(result)}")
logger.info(f"🎯 Final result: '{result}'")
# Check for malformed response patterns
if "[}]" in result:
logger.error("❌ FOUND '[}]' ARTIFACT in response!")
if any(char.isdigit() and char in "0123456789abcdef" for char in result.lower()):
# Simple check for potential UUID patterns
logger.warning("⚠️ Potential UUID-like patterns detected in response")
# Check if result looks like a tool call or JSON
if result.startswith('{') or '"name"' in result or '"arguments"' in result:
logger.error("❌ FOUND JSON/TOOL CALL ARTIFACT in response!")
return result
except Exception as e:
logger.error(f"❌ Error during audio processing test: {e}")
import traceback
logger.error(f"πŸ“‹ Traceback: {traceback.format_exc()}")
return None
finally:
# Clean up test file
try:
os.unlink(test_mp3_path)
logger.info("🧹 Cleaned up test MP3 file")
except Exception as e:
logger.warning(f"⚠️ Failed to clean up test file: {e}")
def test_multimodal_tools_directly():
"""Test the multimodal tools directly to isolate the issue."""
logger.info("πŸ”§ Testing multimodal tools directly...")
try:
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools
# Initialize multimodal tools
multimodal = OpenSourceMultimodalTools()
# Create test MP3 file
test_mp3_path = create_test_mp3_file()
# Test audio transcription directly
logger.info("🎡 Testing audio transcription directly...")
transcription = multimodal.transcribe_audio(test_mp3_path)
logger.info(f"πŸ“ Direct transcription result: {repr(transcription)}")
# Check for artifacts
if "[}]" in transcription:
logger.error("❌ FOUND '[}]' ARTIFACT in direct transcription!")
if transcription.startswith('{') or '"name"' in transcription:
logger.error("❌ FOUND JSON ARTIFACT in direct transcription!")
# Clean up
os.unlink(test_mp3_path)
return transcription
except Exception as e:
logger.error(f"❌ Error testing multimodal tools directly: {e}")
import traceback
logger.error(f"πŸ“‹ Traceback: {traceback.format_exc()}")
return None
def main():
"""Main debug function."""
logger.info("πŸ› GAIA Audio Processing Debug Tool")
logger.info("=" * 50)
# Test 1: Direct multimodal tools test
logger.info("\nπŸ”§ TEST 1: Direct Multimodal Tools Test")
logger.info("-" * 40)
direct_result = test_multimodal_tools_directly()
# Test 2: Full agent test
logger.info("\nπŸ€– TEST 2: Full Agent Test")
logger.info("-" * 40)
agent_result = test_audio_processing_issue()
# Summary
logger.info("\nπŸ“Š DEBUG SUMMARY")
logger.info("=" * 50)
logger.info(f"Direct multimodal result: {repr(direct_result)}")
logger.info(f"Full agent result: {repr(agent_result)}")
# Analysis
if direct_result and "[}]" in direct_result:
logger.error("🚨 ISSUE FOUND: '[}]' artifacts in direct multimodal tools")
elif agent_result and "[}]" in agent_result:
logger.error("🚨 ISSUE FOUND: '[}]' artifacts in agent processing pipeline")
else:
logger.info("βœ… No '[}]' artifacts detected in this test")
if __name__ == "__main__":
main()