File size: 5,745 Bytes
9a6a4dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python3
"""
Debug Audio Processing Issue

This script reproduces the MP3 audio processing issue that causes
malformed responses with "[}]" and UUID artifacts in GAIA evaluation.
"""

import os
import sys
import logging
import tempfile
from pathlib import Path

# Add the deployment-ready directory to Python path
sys.path.insert(0, str(Path(__file__).parent))

from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def create_test_mp3_file():
    """Create a minimal test MP3 file for debugging."""
    # Create a minimal MP3 file (just headers, no actual audio)
    mp3_header = b'\xff\xfb\x90\x00' + b'\x00' * 100  # Minimal MP3 header + padding
    
    with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
        tmp.write(mp3_header)
        tmp.flush()
        return tmp.name

def test_audio_processing_issue():
    """Test audio processing to identify the source of malformed responses."""
    logger.info("πŸ› Starting audio processing debug test...")
    
    # Create test MP3 file
    test_mp3_path = create_test_mp3_file()
    logger.info(f"πŸ“„ Created test MP3 file: {test_mp3_path}")
    
    try:
        # Initialize the agent
        logger.info("πŸš€ Initializing FixedGAIAAgent...")
        agent = FixedGAIAAgent()
        
        if not agent.available:
            logger.error("❌ Agent not available - cannot test")
            return
        
        # Test question with MP3 file
        test_question = "What is said in this audio file?"
        test_files = [test_mp3_path]
        
        logger.info(f"πŸ€” Testing question: {test_question}")
        logger.info(f"πŸ“Ž With MP3 file: {test_mp3_path}")
        
        # Process the question - this should trigger the audio processing
        logger.info("πŸ”„ Processing question with MP3 file...")
        result = agent(test_question, test_files)
        
        logger.info(f"πŸ“ Raw result: {repr(result)}")
        logger.info(f"🎯 Final result: '{result}'")
        
        # Check for malformed response patterns
        if "[}]" in result:
            logger.error("❌ FOUND '[}]' ARTIFACT in response!")
        
        if any(char.isdigit() and char in "0123456789abcdef" for char in result.lower()):
            # Simple check for potential UUID patterns
            logger.warning("⚠️ Potential UUID-like patterns detected in response")
        
        # Check if result looks like a tool call or JSON
        if result.startswith('{') or '"name"' in result or '"arguments"' in result:
            logger.error("❌ FOUND JSON/TOOL CALL ARTIFACT in response!")
        
        return result
        
    except Exception as e:
        logger.error(f"❌ Error during audio processing test: {e}")
        import traceback
        logger.error(f"πŸ“‹ Traceback: {traceback.format_exc()}")
        return None
        
    finally:
        # Clean up test file
        try:
            os.unlink(test_mp3_path)
            logger.info("🧹 Cleaned up test MP3 file")
        except Exception as e:
            logger.warning(f"⚠️ Failed to clean up test file: {e}")

def test_multimodal_tools_directly():
    """Test the multimodal tools directly to isolate the issue."""
    logger.info("πŸ”§ Testing multimodal tools directly...")
    
    try:
        from agents.mistral_multimodal_agent import OpenSourceMultimodalTools
        
        # Initialize multimodal tools
        multimodal = OpenSourceMultimodalTools()
        
        # Create test MP3 file
        test_mp3_path = create_test_mp3_file()
        
        # Test audio transcription directly
        logger.info("🎡 Testing audio transcription directly...")
        transcription = multimodal.transcribe_audio(test_mp3_path)
        
        logger.info(f"πŸ“ Direct transcription result: {repr(transcription)}")
        
        # Check for artifacts
        if "[}]" in transcription:
            logger.error("❌ FOUND '[}]' ARTIFACT in direct transcription!")
        
        if transcription.startswith('{') or '"name"' in transcription:
            logger.error("❌ FOUND JSON ARTIFACT in direct transcription!")
        
        # Clean up
        os.unlink(test_mp3_path)
        
        return transcription
        
    except Exception as e:
        logger.error(f"❌ Error testing multimodal tools directly: {e}")
        import traceback
        logger.error(f"πŸ“‹ Traceback: {traceback.format_exc()}")
        return None

def main():
    """Main debug function."""
    logger.info("πŸ› GAIA Audio Processing Debug Tool")
    logger.info("=" * 50)
    
    # Test 1: Direct multimodal tools test
    logger.info("\nπŸ”§ TEST 1: Direct Multimodal Tools Test")
    logger.info("-" * 40)
    direct_result = test_multimodal_tools_directly()
    
    # Test 2: Full agent test
    logger.info("\nπŸ€– TEST 2: Full Agent Test")
    logger.info("-" * 40)
    agent_result = test_audio_processing_issue()
    
    # Summary
    logger.info("\nπŸ“Š DEBUG SUMMARY")
    logger.info("=" * 50)
    logger.info(f"Direct multimodal result: {repr(direct_result)}")
    logger.info(f"Full agent result: {repr(agent_result)}")
    
    # Analysis
    if direct_result and "[}]" in direct_result:
        logger.error("🚨 ISSUE FOUND: '[}]' artifacts in direct multimodal tools")
    elif agent_result and "[}]" in agent_result:
        logger.error("🚨 ISSUE FOUND: '[}]' artifacts in agent processing pipeline")
    else:
        logger.info("βœ… No '[}]' artifacts detected in this test")

if __name__ == "__main__":
    main()