Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

File size: 5,745 Bytes

9a6a4dc

#!/usr/bin/env python3
"""
Debug Audio Processing Issue

This script reproduces the MP3 audio processing issue that causes
malformed responses with "[}]" and UUID artifacts in GAIA evaluation.
"""

import os
import sys
import logging
import tempfile
from pathlib import Path

# Add the deployment-ready directory to Python path
sys.path.insert(0, str(Path(__file__).parent))

from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def create_test_mp3_file():
    """Create a minimal test MP3 file for debugging."""
    # Create a minimal MP3 file (just headers, no actual audio)
    mp3_header = b'\xff\xfb\x90\x00' + b'\x00' * 100  # Minimal MP3 header + padding
    
    with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
        tmp.write(mp3_header)
        tmp.flush()
        return tmp.name

def test_audio_processing_issue():
    """Test audio processing to identify the source of malformed responses."""
    logger.info("🐛 Starting audio processing debug test...")
    
    # Create test MP3 file
    test_mp3_path = create_test_mp3_file()
    logger.info(f"📄 Created test MP3 file: {test_mp3_path}")
    
    try:
        # Initialize the agent
        logger.info("🚀 Initializing FixedGAIAAgent...")
        agent = FixedGAIAAgent()
        
        if not agent.available:
            logger.error("❌ Agent not available - cannot test")
            return
        
        # Test question with MP3 file
        test_question = "What is said in this audio file?"
        test_files = [test_mp3_path]
        
        logger.info(f"🤔 Testing question: {test_question}")
        logger.info(f"📎 With MP3 file: {test_mp3_path}")
        
        # Process the question - this should trigger the audio processing
        logger.info("🔄 Processing question with MP3 file...")
        result = agent(test_question, test_files)
        
        logger.info(f"📝 Raw result: {repr(result)}")
        logger.info(f"🎯 Final result: '{result}'")
        
        # Check for malformed response patterns
        if "[}]" in result:
            logger.error("❌ FOUND '[}]' ARTIFACT in response!")
        
        if any(char.isdigit() and char in "0123456789abcdef" for char in result.lower()):
            # Simple check for potential UUID patterns
            logger.warning("⚠️ Potential UUID-like patterns detected in response")
        
        # Check if result looks like a tool call or JSON
        if result.startswith('{') or '"name"' in result or '"arguments"' in result:
            logger.error("❌ FOUND JSON/TOOL CALL ARTIFACT in response!")
        
        return result
        
    except Exception as e:
        logger.error(f"❌ Error during audio processing test: {e}")
        import traceback
        logger.error(f"📋 Traceback: {traceback.format_exc()}")
        return None
        
    finally:
        # Clean up test file
        try:
            os.unlink(test_mp3_path)
            logger.info("🧹 Cleaned up test MP3 file")
        except Exception as e:
            logger.warning(f"⚠️ Failed to clean up test file: {e}")

def test_multimodal_tools_directly():
    """Test the multimodal tools directly to isolate the issue."""
    logger.info("🔧 Testing multimodal tools directly...")
    
    try:
        from agents.mistral_multimodal_agent import OpenSourceMultimodalTools
        
        # Initialize multimodal tools
        multimodal = OpenSourceMultimodalTools()
        
        # Create test MP3 file
        test_mp3_path = create_test_mp3_file()
        
        # Test audio transcription directly
        logger.info("🎵 Testing audio transcription directly...")
        transcription = multimodal.transcribe_audio(test_mp3_path)
        
        logger.info(f"📝 Direct transcription result: {repr(transcription)}")
        
        # Check for artifacts
        if "[}]" in transcription:
            logger.error("❌ FOUND '[}]' ARTIFACT in direct transcription!")
        
        if transcription.startswith('{') or '"name"' in transcription:
            logger.error("❌ FOUND JSON ARTIFACT in direct transcription!")
        
        # Clean up
        os.unlink(test_mp3_path)
        
        return transcription
        
    except Exception as e:
        logger.error(f"❌ Error testing multimodal tools directly: {e}")
        import traceback
        logger.error(f"📋 Traceback: {traceback.format_exc()}")
        return None

def main():
    """Main debug function."""
    logger.info("🐛 GAIA Audio Processing Debug Tool")
    logger.info("=" * 50)
    
    # Test 1: Direct multimodal tools test
    logger.info("\n🔧 TEST 1: Direct Multimodal Tools Test")
    logger.info("-" * 40)
    direct_result = test_multimodal_tools_directly()
    
    # Test 2: Full agent test
    logger.info("\n🤖 TEST 2: Full Agent Test")
    logger.info("-" * 40)
    agent_result = test_audio_processing_issue()
    
    # Summary
    logger.info("\n📊 DEBUG SUMMARY")
    logger.info("=" * 50)
    logger.info(f"Direct multimodal result: {repr(direct_result)}")
    logger.info(f"Full agent result: {repr(agent_result)}")
    
    # Analysis
    if direct_result and "[}]" in direct_result:
        logger.error("🚨 ISSUE FOUND: '[}]' artifacts in direct multimodal tools")
    elif agent_result and "[}]" in agent_result:
        logger.error("🚨 ISSUE FOUND: '[}]' artifacts in agent processing pipeline")
    else:
        logger.info("✅ No '[}]' artifacts detected in this test")

if __name__ == "__main__":
    main()