Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Debug Audio Processing Issue | |
This script reproduces the MP3 audio processing issue that causes | |
malformed responses with "[}]" and UUID artifacts in GAIA evaluation. | |
""" | |
import os | |
import sys | |
import logging | |
import tempfile | |
from pathlib import Path | |
# Add the deployment-ready directory to Python path | |
sys.path.insert(0, str(Path(__file__).parent)) | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
# Configure logging | |
logging.basicConfig( | |
level=logging.DEBUG, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
def create_test_mp3_file(): | |
"""Create a minimal test MP3 file for debugging.""" | |
# Create a minimal MP3 file (just headers, no actual audio) | |
mp3_header = b'\xff\xfb\x90\x00' + b'\x00' * 100 # Minimal MP3 header + padding | |
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp: | |
tmp.write(mp3_header) | |
tmp.flush() | |
return tmp.name | |
def test_audio_processing_issue(): | |
"""Test audio processing to identify the source of malformed responses.""" | |
logger.info("π Starting audio processing debug test...") | |
# Create test MP3 file | |
test_mp3_path = create_test_mp3_file() | |
logger.info(f"π Created test MP3 file: {test_mp3_path}") | |
try: | |
# Initialize the agent | |
logger.info("π Initializing FixedGAIAAgent...") | |
agent = FixedGAIAAgent() | |
if not agent.available: | |
logger.error("β Agent not available - cannot test") | |
return | |
# Test question with MP3 file | |
test_question = "What is said in this audio file?" | |
test_files = [test_mp3_path] | |
logger.info(f"π€ Testing question: {test_question}") | |
logger.info(f"π With MP3 file: {test_mp3_path}") | |
# Process the question - this should trigger the audio processing | |
logger.info("π Processing question with MP3 file...") | |
result = agent(test_question, test_files) | |
logger.info(f"π Raw result: {repr(result)}") | |
logger.info(f"π― Final result: '{result}'") | |
# Check for malformed response patterns | |
if "[}]" in result: | |
logger.error("β FOUND '[}]' ARTIFACT in response!") | |
if any(char.isdigit() and char in "0123456789abcdef" for char in result.lower()): | |
# Simple check for potential UUID patterns | |
logger.warning("β οΈ Potential UUID-like patterns detected in response") | |
# Check if result looks like a tool call or JSON | |
if result.startswith('{') or '"name"' in result or '"arguments"' in result: | |
logger.error("β FOUND JSON/TOOL CALL ARTIFACT in response!") | |
return result | |
except Exception as e: | |
logger.error(f"β Error during audio processing test: {e}") | |
import traceback | |
logger.error(f"π Traceback: {traceback.format_exc()}") | |
return None | |
finally: | |
# Clean up test file | |
try: | |
os.unlink(test_mp3_path) | |
logger.info("π§Ή Cleaned up test MP3 file") | |
except Exception as e: | |
logger.warning(f"β οΈ Failed to clean up test file: {e}") | |
def test_multimodal_tools_directly(): | |
"""Test the multimodal tools directly to isolate the issue.""" | |
logger.info("π§ Testing multimodal tools directly...") | |
try: | |
from agents.mistral_multimodal_agent import OpenSourceMultimodalTools | |
# Initialize multimodal tools | |
multimodal = OpenSourceMultimodalTools() | |
# Create test MP3 file | |
test_mp3_path = create_test_mp3_file() | |
# Test audio transcription directly | |
logger.info("π΅ Testing audio transcription directly...") | |
transcription = multimodal.transcribe_audio(test_mp3_path) | |
logger.info(f"π Direct transcription result: {repr(transcription)}") | |
# Check for artifacts | |
if "[}]" in transcription: | |
logger.error("β FOUND '[}]' ARTIFACT in direct transcription!") | |
if transcription.startswith('{') or '"name"' in transcription: | |
logger.error("β FOUND JSON ARTIFACT in direct transcription!") | |
# Clean up | |
os.unlink(test_mp3_path) | |
return transcription | |
except Exception as e: | |
logger.error(f"β Error testing multimodal tools directly: {e}") | |
import traceback | |
logger.error(f"π Traceback: {traceback.format_exc()}") | |
return None | |
def main(): | |
"""Main debug function.""" | |
logger.info("π GAIA Audio Processing Debug Tool") | |
logger.info("=" * 50) | |
# Test 1: Direct multimodal tools test | |
logger.info("\nπ§ TEST 1: Direct Multimodal Tools Test") | |
logger.info("-" * 40) | |
direct_result = test_multimodal_tools_directly() | |
# Test 2: Full agent test | |
logger.info("\nπ€ TEST 2: Full Agent Test") | |
logger.info("-" * 40) | |
agent_result = test_audio_processing_issue() | |
# Summary | |
logger.info("\nπ DEBUG SUMMARY") | |
logger.info("=" * 50) | |
logger.info(f"Direct multimodal result: {repr(direct_result)}") | |
logger.info(f"Full agent result: {repr(agent_result)}") | |
# Analysis | |
if direct_result and "[}]" in direct_result: | |
logger.error("π¨ ISSUE FOUND: '[}]' artifacts in direct multimodal tools") | |
elif agent_result and "[}]" in agent_result: | |
logger.error("π¨ ISSUE FOUND: '[}]' artifacts in agent processing pipeline") | |
else: | |
logger.info("β No '[}]' artifacts detected in this test") | |
if __name__ == "__main__": | |
main() |