gaia-enhanced-agent / debug_audio_real_scenario.py
GAIA Agent Deployment
Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements
9a6a4dc
#!/usr/bin/env python3
"""
Debug Real Audio Processing Scenario
This script tests with a real audio scenario to reproduce the actual
"[}]" and UUID artifacts that occur in GAIA evaluation.
"""
import os
import sys
import logging
import tempfile
import wave
import struct
from pathlib import Path
# Add the deployment-ready directory to Python path
sys.path.insert(0, str(Path(__file__).parent))
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def create_real_wav_file():
"""Create a real WAV file with actual audio data."""
# Create a simple sine wave audio file
sample_rate = 44100
duration = 1.0 # 1 second
frequency = 440 # A4 note
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
# Create WAV file
with wave.open(tmp.name, 'w') as wav_file:
wav_file.setnchannels(1) # Mono
wav_file.setsampwidth(2) # 16-bit
wav_file.setframerate(sample_rate)
# Generate sine wave
for i in range(int(sample_rate * duration)):
value = int(32767 * 0.3 *
(1.0 if i % (sample_rate // frequency) < (sample_rate // frequency // 2) else -1.0))
wav_file.writeframes(struct.pack('<h', value))
return tmp.name
def test_tool_parameter_issue():
"""Test the specific tool parameter validation issue."""
logger.info("πŸ”§ Testing tool parameter validation issue...")
try:
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools
# Initialize multimodal tools
multimodal = OpenSourceMultimodalTools()
# Create real WAV file
test_wav_path = create_real_wav_file()
logger.info(f"πŸ“„ Created test WAV file: {test_wav_path}")
# Test 1: Direct call with string (should work)
logger.info("πŸ§ͺ Test 1: Direct call with string parameter")
try:
result1 = multimodal.transcribe_audio(test_wav_path)
logger.info(f"βœ… Direct string call result: {repr(result1)}")
except Exception as e:
logger.error(f"❌ Direct string call failed: {e}")
# Test 2: Call with dict (this is what AGNO is doing - should fail)
logger.info("πŸ§ͺ Test 2: Call with dict parameter (AGNO style)")
try:
result2 = multimodal.transcribe_audio({'file_path': test_wav_path})
logger.info(f"βœ… Dict call result: {repr(result2)}")
except Exception as e:
logger.error(f"❌ Dict call failed: {e}")
logger.error("🚨 THIS IS THE ROOT CAUSE - AGNO passes dict, function expects string!")
# Clean up
os.unlink(test_wav_path)
except Exception as e:
logger.error(f"❌ Tool parameter test failed: {e}")
def test_agno_tool_call_format():
"""Test how AGNO is calling the audio transcription tool."""
logger.info("πŸ€– Testing AGNO tool call format...")
# Create real WAV file
test_wav_path = create_real_wav_file()
try:
# Initialize the agent
agent = FixedGAIAAgent()
if not agent.available:
logger.error("❌ Agent not available")
return
# Test with a simple question that should trigger audio transcription
test_question = "What is said in this audio file?"
test_files = [test_wav_path]
logger.info(f"πŸ€” Testing with real WAV file: {test_wav_path}")
# Process - this will show us exactly how AGNO calls the tool
result = agent(test_question, test_files)
logger.info(f"🎯 Final result: '{result}'")
# Check for malformed patterns
if "[}]" in result:
logger.error("❌ FOUND '[}]' ARTIFACT!")
if result.startswith('{') or '"name"' in result:
logger.error("❌ FOUND JSON ARTIFACT!")
except Exception as e:
logger.error(f"❌ AGNO test failed: {e}")
import traceback
logger.error(f"πŸ“‹ Traceback: {traceback.format_exc()}")
finally:
# Clean up
try:
os.unlink(test_wav_path)
except:
pass
def main():
"""Main debug function."""
logger.info("πŸ› GAIA Audio Processing Real Scenario Debug")
logger.info("=" * 60)
# Test 1: Tool parameter validation issue
logger.info("\nπŸ”§ TEST 1: Tool Parameter Validation")
logger.info("-" * 40)
test_tool_parameter_issue()
# Test 2: AGNO tool call format
logger.info("\nπŸ€– TEST 2: AGNO Tool Call Format")
logger.info("-" * 40)
test_agno_tool_call_format()
if __name__ == "__main__":
main()