Spaces:

JoachimVC
/

gaia-enhanced-agent

Running

gaia-enhanced-agent / debug_audio_processing.py

GAIA Agent Deployment

Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements

9a6a4dc 4 days ago

5.75 kB

	#!/usr/bin/env python3
	"""
	Debug Audio Processing Issue

	This script reproduces the MP3 audio processing issue that causes
	malformed responses with "[}]" and UUID artifacts in GAIA evaluation.
	"""

	import os
	import sys
	import logging
	import tempfile
	from pathlib import Path

	# Add the deployment-ready directory to Python path
	sys.path.insert(0, str(Path(__file__).parent))

	from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent

	# Configure logging
	logging.basicConfig(
	level=logging.DEBUG,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	def create_test_mp3_file():
	"""Create a minimal test MP3 file for debugging."""
	# Create a minimal MP3 file (just headers, no actual audio)
	mp3_header = b'\xff\xfb\x90\x00' + b'\x00' * 100 # Minimal MP3 header + padding

	with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
	tmp.write(mp3_header)
	tmp.flush()
	return tmp.name

	def test_audio_processing_issue():
	"""Test audio processing to identify the source of malformed responses."""
	logger.info("🐛 Starting audio processing debug test...")

	# Create test MP3 file
	test_mp3_path = create_test_mp3_file()
	logger.info(f"📄 Created test MP3 file: {test_mp3_path}")

	try:
	# Initialize the agent
	logger.info("🚀 Initializing FixedGAIAAgent...")
	agent = FixedGAIAAgent()

	if not agent.available:
	logger.error("❌ Agent not available - cannot test")
	return

	# Test question with MP3 file
	test_question = "What is said in this audio file?"
	test_files = [test_mp3_path]

	logger.info(f"🤔 Testing question: {test_question}")
	logger.info(f"📎 With MP3 file: {test_mp3_path}")

	# Process the question - this should trigger the audio processing
	logger.info("🔄 Processing question with MP3 file...")
	result = agent(test_question, test_files)

	logger.info(f"📝 Raw result: {repr(result)}")
	logger.info(f"🎯 Final result: '{result}'")

	# Check for malformed response patterns
	if "[}]" in result:
	logger.error("❌ FOUND '[}]' ARTIFACT in response!")

	if any(char.isdigit() and char in "0123456789abcdef" for char in result.lower()):
	# Simple check for potential UUID patterns
	logger.warning("⚠️ Potential UUID-like patterns detected in response")

	# Check if result looks like a tool call or JSON
	if result.startswith('{') or '"name"' in result or '"arguments"' in result:
	logger.error("❌ FOUND JSON/TOOL CALL ARTIFACT in response!")

	return result

	except Exception as e:
	logger.error(f"❌ Error during audio processing test: {e}")
	import traceback
	logger.error(f"📋 Traceback: {traceback.format_exc()}")
	return None

	finally:
	# Clean up test file
	try:
	os.unlink(test_mp3_path)
	logger.info("🧹 Cleaned up test MP3 file")
	except Exception as e:
	logger.warning(f"⚠️ Failed to clean up test file: {e}")

	def test_multimodal_tools_directly():
	"""Test the multimodal tools directly to isolate the issue."""
	logger.info("🔧 Testing multimodal tools directly...")

	try:
	from agents.mistral_multimodal_agent import OpenSourceMultimodalTools

	# Initialize multimodal tools
	multimodal = OpenSourceMultimodalTools()

	# Create test MP3 file
	test_mp3_path = create_test_mp3_file()

	# Test audio transcription directly
	logger.info("🎵 Testing audio transcription directly...")
	transcription = multimodal.transcribe_audio(test_mp3_path)

	logger.info(f"📝 Direct transcription result: {repr(transcription)}")

	# Check for artifacts
	if "[}]" in transcription:
	logger.error("❌ FOUND '[}]' ARTIFACT in direct transcription!")

	if transcription.startswith('{') or '"name"' in transcription:
	logger.error("❌ FOUND JSON ARTIFACT in direct transcription!")

	# Clean up
	os.unlink(test_mp3_path)

	return transcription

	except Exception as e:
	logger.error(f"❌ Error testing multimodal tools directly: {e}")
	import traceback
	logger.error(f"📋 Traceback: {traceback.format_exc()}")
	return None

	def main():
	"""Main debug function."""
	logger.info("🐛 GAIA Audio Processing Debug Tool")
	logger.info("=" * 50)

	# Test 1: Direct multimodal tools test
	logger.info("\n🔧 TEST 1: Direct Multimodal Tools Test")
	logger.info("-" * 40)
	direct_result = test_multimodal_tools_directly()

	# Test 2: Full agent test
	logger.info("\n🤖 TEST 2: Full Agent Test")
	logger.info("-" * 40)
	agent_result = test_audio_processing_issue()

	# Summary
	logger.info("\n📊 DEBUG SUMMARY")
	logger.info("=" * 50)
	logger.info(f"Direct multimodal result: {repr(direct_result)}")
	logger.info(f"Full agent result: {repr(agent_result)}")

	# Analysis
	if direct_result and "[}]" in direct_result:
	logger.error("🚨 ISSUE FOUND: '[}]' artifacts in direct multimodal tools")
	elif agent_result and "[}]" in agent_result:
	logger.error("🚨 ISSUE FOUND: '[}]' artifacts in agent processing pipeline")
	else:
	logger.info("✅ No '[}]' artifacts detected in this test")

	if __name__ == "__main__":
	main()