Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Debug Real Audio Processing Scenario | |
This script tests with a real audio scenario to reproduce the actual | |
"[}]" and UUID artifacts that occur in GAIA evaluation. | |
""" | |
import os | |
import sys | |
import logging | |
import tempfile | |
import wave | |
import struct | |
from pathlib import Path | |
# Add the deployment-ready directory to Python path | |
sys.path.insert(0, str(Path(__file__).parent)) | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
def create_real_wav_file(): | |
"""Create a real WAV file with actual audio data.""" | |
# Create a simple sine wave audio file | |
sample_rate = 44100 | |
duration = 1.0 # 1 second | |
frequency = 440 # A4 note | |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: | |
# Create WAV file | |
with wave.open(tmp.name, 'w') as wav_file: | |
wav_file.setnchannels(1) # Mono | |
wav_file.setsampwidth(2) # 16-bit | |
wav_file.setframerate(sample_rate) | |
# Generate sine wave | |
for i in range(int(sample_rate * duration)): | |
value = int(32767 * 0.3 * | |
(1.0 if i % (sample_rate // frequency) < (sample_rate // frequency // 2) else -1.0)) | |
wav_file.writeframes(struct.pack('<h', value)) | |
return tmp.name | |
def test_tool_parameter_issue(): | |
"""Test the specific tool parameter validation issue.""" | |
logger.info("π§ Testing tool parameter validation issue...") | |
try: | |
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools | |
# Initialize multimodal tools | |
multimodal = OpenSourceMultimodalTools() | |
# Create real WAV file | |
test_wav_path = create_real_wav_file() | |
logger.info(f"π Created test WAV file: {test_wav_path}") | |
# Test 1: Direct call with string (should work) | |
logger.info("π§ͺ Test 1: Direct call with string parameter") | |
try: | |
result1 = multimodal.transcribe_audio(test_wav_path) | |
logger.info(f"β Direct string call result: {repr(result1)}") | |
except Exception as e: | |
logger.error(f"β Direct string call failed: {e}") | |
# Test 2: Call with dict (this is what AGNO is doing - should fail) | |
logger.info("π§ͺ Test 2: Call with dict parameter (AGNO style)") | |
try: | |
result2 = multimodal.transcribe_audio({'file_path': test_wav_path}) | |
logger.info(f"β Dict call result: {repr(result2)}") | |
except Exception as e: | |
logger.error(f"β Dict call failed: {e}") | |
logger.error("π¨ THIS IS THE ROOT CAUSE - AGNO passes dict, function expects string!") | |
# Clean up | |
os.unlink(test_wav_path) | |
except Exception as e: | |
logger.error(f"β Tool parameter test failed: {e}") | |
def test_agno_tool_call_format(): | |
"""Test how AGNO is calling the audio transcription tool.""" | |
logger.info("π€ Testing AGNO tool call format...") | |
# Create real WAV file | |
test_wav_path = create_real_wav_file() | |
try: | |
# Initialize the agent | |
agent = FixedGAIAAgent() | |
if not agent.available: | |
logger.error("β Agent not available") | |
return | |
# Test with a simple question that should trigger audio transcription | |
test_question = "What is said in this audio file?" | |
test_files = [test_wav_path] | |
logger.info(f"π€ Testing with real WAV file: {test_wav_path}") | |
# Process - this will show us exactly how AGNO calls the tool | |
result = agent(test_question, test_files) | |
logger.info(f"π― Final result: '{result}'") | |
# Check for malformed patterns | |
if "[}]" in result: | |
logger.error("β FOUND '[}]' ARTIFACT!") | |
if result.startswith('{') or '"name"' in result: | |
logger.error("β FOUND JSON ARTIFACT!") | |
except Exception as e: | |
logger.error(f"β AGNO test failed: {e}") | |
import traceback | |
logger.error(f"π Traceback: {traceback.format_exc()}") | |
finally: | |
# Clean up | |
try: | |
os.unlink(test_wav_path) | |
except: | |
pass | |
def main(): | |
"""Main debug function.""" | |
logger.info("π GAIA Audio Processing Real Scenario Debug") | |
logger.info("=" * 60) | |
# Test 1: Tool parameter validation issue | |
logger.info("\nπ§ TEST 1: Tool Parameter Validation") | |
logger.info("-" * 40) | |
test_tool_parameter_issue() | |
# Test 2: AGNO tool call format | |
logger.info("\nπ€ TEST 2: AGNO Tool Call Format") | |
logger.info("-" * 40) | |
test_agno_tool_call_format() | |
if __name__ == "__main__": | |
main() |