File size: 4,964 Bytes
9a6a4dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""
Debug Real Audio Processing Scenario

This script tests with a real audio scenario to reproduce the actual
"[}]" and UUID artifacts that occur in GAIA evaluation.
"""

import os
import sys
import logging
import tempfile
import wave
import struct
from pathlib import Path

# Add the deployment-ready directory to Python path
sys.path.insert(0, str(Path(__file__).parent))

from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def create_real_wav_file():
    """Create a real WAV file with actual audio data."""
    # Create a simple sine wave audio file
    sample_rate = 44100
    duration = 1.0  # 1 second
    frequency = 440  # A4 note
    
    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
        # Create WAV file
        with wave.open(tmp.name, 'w') as wav_file:
            wav_file.setnchannels(1)  # Mono
            wav_file.setsampwidth(2)  # 16-bit
            wav_file.setframerate(sample_rate)
            
            # Generate sine wave
            for i in range(int(sample_rate * duration)):
                value = int(32767 * 0.3 * 
                           (1.0 if i % (sample_rate // frequency) < (sample_rate // frequency // 2) else -1.0))
                wav_file.writeframes(struct.pack('<h', value))
        
        return tmp.name

def test_tool_parameter_issue():
    """Test the specific tool parameter validation issue."""
    logger.info("πŸ”§ Testing tool parameter validation issue...")
    
    try:
        from agents.mistral_multimodal_agent import OpenSourceMultimodalTools
        
        # Initialize multimodal tools
        multimodal = OpenSourceMultimodalTools()
        
        # Create real WAV file
        test_wav_path = create_real_wav_file()
        logger.info(f"πŸ“„ Created test WAV file: {test_wav_path}")
        
        # Test 1: Direct call with string (should work)
        logger.info("πŸ§ͺ Test 1: Direct call with string parameter")
        try:
            result1 = multimodal.transcribe_audio(test_wav_path)
            logger.info(f"βœ… Direct string call result: {repr(result1)}")
        except Exception as e:
            logger.error(f"❌ Direct string call failed: {e}")
        
        # Test 2: Call with dict (this is what AGNO is doing - should fail)
        logger.info("πŸ§ͺ Test 2: Call with dict parameter (AGNO style)")
        try:
            result2 = multimodal.transcribe_audio({'file_path': test_wav_path})
            logger.info(f"βœ… Dict call result: {repr(result2)}")
        except Exception as e:
            logger.error(f"❌ Dict call failed: {e}")
            logger.error("🚨 THIS IS THE ROOT CAUSE - AGNO passes dict, function expects string!")
        
        # Clean up
        os.unlink(test_wav_path)
        
    except Exception as e:
        logger.error(f"❌ Tool parameter test failed: {e}")

def test_agno_tool_call_format():
    """Test how AGNO is calling the audio transcription tool."""
    logger.info("πŸ€– Testing AGNO tool call format...")
    
    # Create real WAV file
    test_wav_path = create_real_wav_file()
    
    try:
        # Initialize the agent
        agent = FixedGAIAAgent()
        
        if not agent.available:
            logger.error("❌ Agent not available")
            return
        
        # Test with a simple question that should trigger audio transcription
        test_question = "What is said in this audio file?"
        test_files = [test_wav_path]
        
        logger.info(f"πŸ€” Testing with real WAV file: {test_wav_path}")
        
        # Process - this will show us exactly how AGNO calls the tool
        result = agent(test_question, test_files)
        
        logger.info(f"🎯 Final result: '{result}'")
        
        # Check for malformed patterns
        if "[}]" in result:
            logger.error("❌ FOUND '[}]' ARTIFACT!")
        if result.startswith('{') or '"name"' in result:
            logger.error("❌ FOUND JSON ARTIFACT!")
        
    except Exception as e:
        logger.error(f"❌ AGNO test failed: {e}")
        import traceback
        logger.error(f"πŸ“‹ Traceback: {traceback.format_exc()}")
    finally:
        # Clean up
        try:
            os.unlink(test_wav_path)
        except:
            pass

def main():
    """Main debug function."""
    logger.info("πŸ› GAIA Audio Processing Real Scenario Debug")
    logger.info("=" * 60)
    
    # Test 1: Tool parameter validation issue
    logger.info("\nπŸ”§ TEST 1: Tool Parameter Validation")
    logger.info("-" * 40)
    test_tool_parameter_issue()
    
    # Test 2: AGNO tool call format
    logger.info("\nπŸ€– TEST 2: AGNO Tool Call Format")
    logger.info("-" * 40)
    test_agno_tool_call_format()

if __name__ == "__main__":
    main()