Spaces:
Running
Running
File size: 4,964 Bytes
9a6a4dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
#!/usr/bin/env python3
"""
Debug Real Audio Processing Scenario
This script tests with a real audio scenario to reproduce the actual
"[}]" and UUID artifacts that occur in GAIA evaluation.
"""
import os
import sys
import logging
import tempfile
import wave
import struct
from pathlib import Path
# Add the deployment-ready directory to Python path
sys.path.insert(0, str(Path(__file__).parent))
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def create_real_wav_file():
"""Create a real WAV file with actual audio data."""
# Create a simple sine wave audio file
sample_rate = 44100
duration = 1.0 # 1 second
frequency = 440 # A4 note
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
# Create WAV file
with wave.open(tmp.name, 'w') as wav_file:
wav_file.setnchannels(1) # Mono
wav_file.setsampwidth(2) # 16-bit
wav_file.setframerate(sample_rate)
# Generate sine wave
for i in range(int(sample_rate * duration)):
value = int(32767 * 0.3 *
(1.0 if i % (sample_rate // frequency) < (sample_rate // frequency // 2) else -1.0))
wav_file.writeframes(struct.pack('<h', value))
return tmp.name
def test_tool_parameter_issue():
"""Test the specific tool parameter validation issue."""
logger.info("π§ Testing tool parameter validation issue...")
try:
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools
# Initialize multimodal tools
multimodal = OpenSourceMultimodalTools()
# Create real WAV file
test_wav_path = create_real_wav_file()
logger.info(f"π Created test WAV file: {test_wav_path}")
# Test 1: Direct call with string (should work)
logger.info("π§ͺ Test 1: Direct call with string parameter")
try:
result1 = multimodal.transcribe_audio(test_wav_path)
logger.info(f"β
Direct string call result: {repr(result1)}")
except Exception as e:
logger.error(f"β Direct string call failed: {e}")
# Test 2: Call with dict (this is what AGNO is doing - should fail)
logger.info("π§ͺ Test 2: Call with dict parameter (AGNO style)")
try:
result2 = multimodal.transcribe_audio({'file_path': test_wav_path})
logger.info(f"β
Dict call result: {repr(result2)}")
except Exception as e:
logger.error(f"β Dict call failed: {e}")
logger.error("π¨ THIS IS THE ROOT CAUSE - AGNO passes dict, function expects string!")
# Clean up
os.unlink(test_wav_path)
except Exception as e:
logger.error(f"β Tool parameter test failed: {e}")
def test_agno_tool_call_format():
"""Test how AGNO is calling the audio transcription tool."""
logger.info("π€ Testing AGNO tool call format...")
# Create real WAV file
test_wav_path = create_real_wav_file()
try:
# Initialize the agent
agent = FixedGAIAAgent()
if not agent.available:
logger.error("β Agent not available")
return
# Test with a simple question that should trigger audio transcription
test_question = "What is said in this audio file?"
test_files = [test_wav_path]
logger.info(f"π€ Testing with real WAV file: {test_wav_path}")
# Process - this will show us exactly how AGNO calls the tool
result = agent(test_question, test_files)
logger.info(f"π― Final result: '{result}'")
# Check for malformed patterns
if "[}]" in result:
logger.error("β FOUND '[}]' ARTIFACT!")
if result.startswith('{') or '"name"' in result:
logger.error("β FOUND JSON ARTIFACT!")
except Exception as e:
logger.error(f"β AGNO test failed: {e}")
import traceback
logger.error(f"π Traceback: {traceback.format_exc()}")
finally:
# Clean up
try:
os.unlink(test_wav_path)
except:
pass
def main():
"""Main debug function."""
logger.info("π GAIA Audio Processing Real Scenario Debug")
logger.info("=" * 60)
# Test 1: Tool parameter validation issue
logger.info("\nπ§ TEST 1: Tool Parameter Validation")
logger.info("-" * 40)
test_tool_parameter_issue()
# Test 2: AGNO tool call format
logger.info("\nπ€ TEST 2: AGNO Tool Call Format")
logger.info("-" * 40)
test_agno_tool_call_format()
if __name__ == "__main__":
main() |