Spaces:

Aedelon
/

GAIA_Agent

Running

App Files Files Community

Aedelon commited on May 1

Commit

b8f6b7f

verified ·

1 Parent(s): a23082c

agent enhancement (#3)

Browse files

- Agent Improvment (6caec8d95ae9906aea5eb0e0465545da0450e97e)
- Agent Improvment (69c2791ac76a8b773bcb069d9453d775c101ae26)

Files changed (36) hide show

.env +3 -1
__pycache__/app.cpython-311.pyc +0 -0
agents/__pycache__/__init__.cpython-311.pyc +0 -0
agents/__pycache__/advanced_validation_agent.cpython-311.pyc +0 -0
agents/__pycache__/code_agent.cpython-311.pyc +0 -0
agents/__pycache__/figure_interpretation_agent.cpython-311.pyc +0 -0
agents/__pycache__/image_analyzer_agent.cpython-311.pyc +0 -0
agents/__pycache__/long_context_management_agent.cpython-311.pyc +0 -0
agents/__pycache__/math_agent.cpython-311.pyc +0 -0
agents/__pycache__/planner_agent.cpython-311.pyc +0 -0
agents/__pycache__/reasoning_agent.cpython-311.pyc +0 -0
agents/__pycache__/research_agent.cpython-311.pyc +0 -0
agents/__pycache__/role_agent.cpython-311.pyc +0 -0
agents/__pycache__/text_analyzer_agent.cpython-311.pyc +0 -0
agents/__pycache__/verifier_agent.cpython-311.pyc +0 -0
agents/__pycache__/video_analyzer_agent.cpython-311.pyc +0 -0
agents/advanced_validation_agent.py +0 -5
agents/code_agent.py +37 -12
agents/figure_interpretation_agent.py +0 -5
agents/image_analyzer_agent.py +1 -5
agents/long_context_management_agent.py +5 -5
agents/math_agent.py +25 -8
agents/planner_agent.py +76 -14
agents/reasoning_agent.py +71 -11
agents/research_agent.py +91 -156
agents/role_agent.py +0 -3
agents/text_analyzer_agent.py +1 -4
agents/verifier_agent.py +1 -5
agents/video_analyzer_agent.py +334 -0
app.py +110 -93
prompts/code_gen_prompt.txt +44 -3
prompts/planner_agent_prompt.txt +31 -26
prompts/reasoning_agent_prompt.txt +19 -9
prompts/video_analyzer_prompt.txt +85 -0
pyproject.toml +17 -1
uv.lock +0 -0

.env CHANGED Viewed

@@ -6,11 +6,13 @@ GOOGLE_API_KEY="AIzaSyACcl4uzlyqz4glW-_uCj0xGPSSH0uloAY" # For Google Custom Sea
 GOOGLE_CSE_ID="004c6b8673f0c4dd5" # For Google Custom Search Engine ID
 TAVILY_API_KEY="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" # For Tavily Search API
 ALPAFLOW_OPENAI_API_KEY="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" # For o4-mini model (or other OpenAI compatible endpoint)
-WOLFRAM_ALPHA_APP_ID="YOUR_WOLFRAM_ALPHA_APP_ID" # For WolframAlpha API
 # GAIA Benchmark API
 GAIA_API_URL="https://agents-course-unit4-scoring.hf.space"
 # Model Names (using defaults from original code, can be overridden)
 ROLE_EMBED_MODEL="Snowflake/snowflake-arctic-embed-l-v2.0"
 ROLE_RERANKER_MODEL="Alibaba-NLP/gte-multilingual-reranker-base"

 GOOGLE_CSE_ID="004c6b8673f0c4dd5" # For Google Custom Search Engine ID
 TAVILY_API_KEY="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" # For Tavily Search API
 ALPAFLOW_OPENAI_API_KEY="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" # For o4-mini model (or other OpenAI compatible endpoint)
+WOLFRAM_ALPHA_APP_ID="Y7YG2L-TEU4RGXRVG" # For WolframAlpha API
 # GAIA Benchmark API
 GAIA_API_URL="https://agents-course-unit4-scoring.hf.space"
+LLM_MODEL="models/gemini-1.5-pro"
 # Model Names (using defaults from original code, can be overridden)
 ROLE_EMBED_MODEL="Snowflake/snowflake-arctic-embed-l-v2.0"
 ROLE_RERANKER_MODEL="Alibaba-NLP/gte-multilingual-reranker-base"

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (28 kB). View file

agents/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/__init__.cpython-311.pyc and b/agents/__pycache__/__init__.cpython-311.pyc differ

agents/__pycache__/advanced_validation_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/advanced_validation_agent.cpython-311.pyc and b/agents/__pycache__/advanced_validation_agent.cpython-311.pyc differ

agents/__pycache__/code_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/code_agent.cpython-311.pyc and b/agents/__pycache__/code_agent.cpython-311.pyc differ

agents/__pycache__/figure_interpretation_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/figure_interpretation_agent.cpython-311.pyc and b/agents/__pycache__/figure_interpretation_agent.cpython-311.pyc differ

agents/__pycache__/image_analyzer_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/image_analyzer_agent.cpython-311.pyc and b/agents/__pycache__/image_analyzer_agent.cpython-311.pyc differ

agents/__pycache__/long_context_management_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/long_context_management_agent.cpython-311.pyc and b/agents/__pycache__/long_context_management_agent.cpython-311.pyc differ

agents/__pycache__/math_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/math_agent.cpython-311.pyc and b/agents/__pycache__/math_agent.cpython-311.pyc differ

agents/__pycache__/planner_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/planner_agent.cpython-311.pyc and b/agents/__pycache__/planner_agent.cpython-311.pyc differ

agents/__pycache__/reasoning_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/reasoning_agent.cpython-311.pyc and b/agents/__pycache__/reasoning_agent.cpython-311.pyc differ

agents/__pycache__/research_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/research_agent.cpython-311.pyc and b/agents/__pycache__/research_agent.cpython-311.pyc differ

agents/__pycache__/role_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/role_agent.cpython-311.pyc and b/agents/__pycache__/role_agent.cpython-311.pyc differ

agents/__pycache__/text_analyzer_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/text_analyzer_agent.cpython-311.pyc and b/agents/__pycache__/text_analyzer_agent.cpython-311.pyc differ

agents/__pycache__/verifier_agent.cpython-311.pyc CHANGED Viewed

Binary files a/agents/__pycache__/verifier_agent.cpython-311.pyc and b/agents/__pycache__/verifier_agent.cpython-311.pyc differ

agents/__pycache__/video_analyzer_agent.cpython-311.pyc ADDED Viewed

Binary file (17 kB). View file

agents/advanced_validation_agent.py CHANGED Viewed

@@ -2,16 +2,12 @@ import os
 import logging
 import json
 from typing import List, Dict, Optional, Union
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
 # Assuming research_agent might be needed for handoff, but not directly imported
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -347,7 +343,6 @@ def initialize_advanced_validation_agent() -> ReActAgent:
             llm=llm,
             system_prompt=system_prompt,
             can_handoff_to=valid_handoffs,
-            verbose=True # Enable verbose logging
         )
         logger.info("AdvancedValidationAgent initialized successfully.")
         return agent

 import logging
 import json
 from typing import List, Dict, Optional, Union
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
 # Assuming research_agent might be needed for handoff, but not directly imported
 # Setup logging
 logger = logging.getLogger(__name__)
             llm=llm,
             system_prompt=system_prompt,
             can_handoff_to=valid_handoffs,
         )
         logger.info("AdvancedValidationAgent initialized successfully.")
         return agent

agents/code_agent.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import logging
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import CodeActAgent, ReActAgent
 from llama_index.core.tools import FunctionTool
@@ -8,9 +7,6 @@ from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.llms.openai import OpenAI
 from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -47,12 +43,10 @@ def generate_python_code(prompt: str) -> str:
     # Configuration for code generation LLM
     gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
-    gen_api_key_env = os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")
-    gen_api_key = os.getenv(gen_api_key_env)
     if not gen_api_key:
-        logger.error(f"{gen_api_key_env} not found in environment variables for code generation LLM.")
-        raise ValueError(f"{gen_api_key_env} must be set for code generation")
     # Load the prompt template
     default_gen_prompt_template = ("You are a helpful assistant that writes Python code. "
@@ -68,7 +62,10 @@ def generate_python_code(prompt: str) -> str:
     try:
         llm = OpenAI(
             model=gen_llm_model,
-            api_key=gen_api_key
         )
         logger.info(f"Using code generation LLM: {gen_llm_model}")
         generated_code = llm.complete(input_prompt)
@@ -145,14 +142,42 @@ def initialize_code_agent() -> ReActAgent:
         6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
         7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
         """
-        # system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)
-        system_prompt = default_system_prompt # Using inline for now
         agent = ReActAgent(
             name="code_agent",
             description=(
                 "Generates Python code using `python_code_generator` and executes it safely using `code_interpreter`. "
-                "Iteratively debugs and refines code based on execution results."
             ),
             # REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
             tools=[

 import os
 import logging
 from llama_index.core.agent.workflow import CodeActAgent, ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.openai import OpenAI
 from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
 # Setup logging
 logger = logging.getLogger(__name__)
     # Configuration for code generation LLM
     gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
+    gen_api_key = os.getenv("OPENAI_API_KEY")
     if not gen_api_key:
+        raise ValueError("OPENAI_API_KEY environment variable is not set.")
     # Load the prompt template
     default_gen_prompt_template = ("You are a helpful assistant that writes Python code. "
     try:
         llm = OpenAI(
             model=gen_llm_model,
+            api_key=gen_api_key,
+            reasoning_effort="high",
+            temperature=0.25,
+            max_tokens=16384
         )
         logger.info(f"Using code generation LLM: {gen_llm_model}")
         generated_code = llm.complete(input_prompt)
         6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
         7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
         """
+        system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)
         agent = ReActAgent(
             name="code_agent",
             description=(
                 "Generates Python code using `python_code_generator` and executes it safely using `code_interpreter`. "
+                "Iteratively debugs and refines code based on execution results. "
+                "The agent has access to the following Python packages:\n"
+                "- beautifulsoup4>=4.13.4\n"
+                "- certifi>=2025.4.26\n"
+                "- datasets>=3.5.1\n"
+                "- dotenv>=0.9.9\n"
+                "- duckdb>=1.2.2\n"
+                "- ffmpeg-python>=0.2.0\n"
+                "- gradio[oauth]>=5.28.0\n"
+                "- helium>=5.1.1\n"
+                "- huggingface>=0.0.1\n"
+                "- imageio>=2.37.0\n"
+                "- matplotlib>=3.10.1\n"
+                "- numpy>=2.2.5\n"
+                "- openai-whisper>=20240930\n"
+                "- opencv-python>=4.11.0.86\n"
+                "- openpyxl>=3.1.5\n"
+                "- pandas>=2.2.3\n"
+                "- pyarrow>=20.0.0\n"
+                "- pygame>=2.6.1\n"
+                "- python-chess>=1.999\n"
+                "- requests>=2.32.3\n"
+                "- scikit-learn>=1.6.1\n"
+                "- scipy>=1.15.2\n"
+                "- seaborn>=0.13.2\n"
+                "- sqlalchemy>=2.0.40\n"
+                "- statsmodels>=0.14.4\n"
+                "- sympy>=1.14.0\n"
+                "- youtube-transcript-api>=1.0.3\n"
+                "- yt-dlp>=2025.3.31"
             ),
             # REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
             tools=[

agents/figure_interpretation_agent.py CHANGED Viewed

@@ -1,16 +1,11 @@
 import os
 import logging
-from typing import List, Dict, Optional, Union
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.schema import ImageDocument
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)

 import os
 import logging
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.schema import ImageDocument
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
 # Setup logging
 logger = logging.getLogger(__name__)

agents/image_analyzer_agent.py CHANGED Viewed

@@ -1,13 +1,9 @@
 import os
 import logging
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import FunctionAgent
 from llama_index.llms.google_genai import GoogleGenAI
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -69,7 +65,7 @@ def initialize_image_analyzer_agent() -> FunctionAgent:
             system_prompt=system_prompt,
             # No explicit tools needed if relying on direct multimodal LLM call
             # tools=[],
-            can_handoff_to=["planner_agent", "research_agent", "reasoning_agent"],
         )
         logger.info("ImageAnalyzerAgent initialized successfully.")
         return agent

 import os
 import logging
 from llama_index.core.agent.workflow import FunctionAgent
 from llama_index.llms.google_genai import GoogleGenAI
 # Setup logging
 logger = logging.getLogger(__name__)
             system_prompt=system_prompt,
             # No explicit tools needed if relying on direct multimodal LLM call
             # tools=[],
+            can_handoff_to=["planner_agent", "research_agent", "reasoning_agent", "figure_interpretation_agent"],
         )
         logger.info("ImageAnalyzerAgent initialized successfully.")
         return agent

agents/long_context_management_agent.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import logging
 import json
 from typing import List, Dict, Optional, Union, Literal
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool, QueryEngineTool
@@ -12,8 +11,6 @@ from llama_index.core.node_parser import SentenceSplitter
 from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.retrievers import VectorIndexRetriever
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -348,8 +345,11 @@ def initialize_long_context_management_agent() -> ReActAgent:
         agent = ReActAgent(
             name="long_context_management_agent",
             description=(
-                "Manages and processes long textual context. Can load text (`load_text_context`), summarize (`summarize_long_context`), "
-                "extract key info (`extract_key_information`), filter by relevance (`filter_by_relevance`), and answer questions based on the context (`query_context_index`)."
             ),
             tools=tools,
             llm=llm,

 import logging
 import json
 from typing import List, Dict, Optional, Union, Literal
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool, QueryEngineTool
 from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.retrievers import VectorIndexRetriever
 # Setup logging
 logger = logging.getLogger(__name__)
         agent = ReActAgent(
             name="long_context_management_agent",
             description=(
+                "Manages and processes long textual context efficiently. Handles large documents, transcripts, or datasets "
+                "by summarizing (`summarize_long_context`), extracting key information (`extract_key_information`), "
+                "filtering relevant content (`filter_by_relevance`), and answering questions based on the context (`query_context_index`). "
+                "Supports internal indexing for efficient retrieval and repeated queries. Optimized for chunked input processing "
+                "and contextual distillation. Only relies on the provided input and avoids external augmentation unless explicitly requested."
             ),
             tools=tools,
             llm=llm,

agents/math_agent.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import os
 import logging
-from typing import List, Optional, Union, Dict
-from dotenv import load_dotenv
 import sympy as sp
 import numpy as np
 import scipy.linalg as la
 import scipy.special as special
-from scipy.integrate import odeint, quad
 from scipy.stats import binom, norm, poisson
 import numpy.fft as fft
@@ -16,9 +16,6 @@ from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -603,6 +600,26 @@ def get_wolfram_alpha_tools() -> List[FunctionTool]:
                 _wolfram_alpha_tools = []
     return _wolfram_alpha_tools
 # --- Agent Initialization ---
 def initialize_math_agent() -> ReActAgent:
@@ -625,7 +642,7 @@ def initialize_math_agent() -> ReActAgent:
         logger.info(f"Using agent LLM: {agent_llm_model}")
         # Combine Python tools and Wolfram Alpha tools
-        all_tools = get_python_math_tools() + get_wolfram_alpha_tools()
         if not all_tools:
              logger.warning("No math tools available (Python or WolframAlpha). MathAgent may be ineffective.")
@@ -661,7 +678,7 @@ def initialize_math_agent() -> ReActAgent:
             tools=all_tools,
             llm=llm,
             system_prompt=system_prompt,
-            can_handoff_to=["planner_agent"],
         )
         logger.info("MathAgent initialized successfully.")
         return agent

 import os
 import logging
+from typing import List, Dict
 import sympy as sp
 import numpy as np
 import scipy.linalg as la
 import scipy.special as special
+from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
+from scipy.integrate import quad
 from scipy.stats import binom, norm, poisson
 import numpy.fft as fft
 from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
 # Setup logging
 logger = logging.getLogger(__name__)
                 _wolfram_alpha_tools = []
     return _wolfram_alpha_tools
+# Use LlamaIndex's built-in Code Interpreter Tool Spec for safe execution
+# This assumes the necessary environment (e.g., docker) for the spec is available
+try:
+    code_interpreter_spec = CodeInterpreterToolSpec()
+    # Get the tool(s) from the spec. It might return multiple tools.
+    code_interpreter_tools = code_interpreter_spec.to_tool_list()
+    if not code_interpreter_tools:
+        raise RuntimeError("CodeInterpreterToolSpec did not return any tools.")
+    # Assuming the primary tool is the first one, or find by name if necessary
+    code_interpreter_tool = next((t for t in code_interpreter_tools if t.metadata.name == "code_interpreter"), None)
+    if code_interpreter_tool is None:
+         raise RuntimeError("Could not find 'code_interpreter' tool in CodeInterpreterToolSpec results.")
+    logger.info("CodeInterpreterToolSpec initialized successfully.")
+except Exception as e:
+    logger.error(f"Failed to initialize CodeInterpreterToolSpec: {e}", exc_info=True)
+    # Fallback: Define a dummy tool or raise error to prevent agent start?
+    # For now, let initialization fail if the safe interpreter isn't available.
+    raise RuntimeError("CodeInterpreterToolSpec failed to initialize. Cannot create code_agent.") from e
 # --- Agent Initialization ---
 def initialize_math_agent() -> ReActAgent:
         logger.info(f"Using agent LLM: {agent_llm_model}")
         # Combine Python tools and Wolfram Alpha tools
+        all_tools = get_python_math_tools() + get_wolfram_alpha_tools() + [code_interpreter_tool]
         if not all_tools:
              logger.warning("No math tools available (Python or WolframAlpha). MathAgent may be ineffective.")
             tools=all_tools,
             llm=llm,
             system_prompt=system_prompt,
+            can_handoff_to=["planner_agent", "reasoning_agent"],
         )
         logger.info("MathAgent initialized successfully.")
         return agent

agents/planner_agent.py CHANGED Viewed

@@ -1,14 +1,11 @@
 import os
 import logging
 from typing import List, Dict
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -48,7 +45,7 @@ def plan(objective: str) -> List[str]:
     gemini_api_key = os.getenv("GEMINI_API_KEY")
     if not gemini_api_key:
         logger.error("GEMINI_API_KEY not found for planning tool LLM.")
-        return ["Error: GEMINI_API_KEY not set for planning."]
     # Prompt for the LLM to generate sub-steps
     input_prompt = (
@@ -84,22 +81,23 @@ def plan(objective: str) -> List[str]:
         if not sub_steps:
              logger.warning("LLM generated no sub-steps for the objective.")
-             return ["Error: Failed to generate sub-steps."]
         logger.info(f"Generated {len(sub_steps)} sub-steps.")
         return sub_steps
     except Exception as e:
         logger.error(f"LLM call failed during planning: {e}", exc_info=True)
-        return [f"Error during planning: {e}"]
-def synthesize_and_respond(results: List[Dict[str, str]]) -> str:
     """
     Aggregate results from sub-steps into a coherent final report using an LLM.
     Args:
         results (List[Dict[str, str]]): List of dictionaries, each with "sub_step" and "answer" keys.
     Returns:
-        str: A unified, well-structured response, or an error message.
     """
     logger.info(f"Synthesizing results from {len(results)} sub-steps...")
     if not results:
@@ -121,7 +119,9 @@ def synthesize_and_respond(results: List[Dict[str, str]]) -> str:
         return "Error: GEMINI_API_KEY not set for synthesis."
     # Prompt for the LLM
-    input_prompt = f"""You are an expert synthesizer. Given the following sub-steps and their answers derived from an initial objective, produce a single, coherent, comprehensive final report that addresses the original objective:
     --- SUB-STEP RESULTS ---
     {summary_blocks.strip()}
@@ -140,10 +140,59 @@ def synthesize_and_respond(results: List[Dict[str, str]]) -> str:
         logger.error(f"LLM call failed during synthesis: {e}", exc_info=True)
         return f"Error during synthesis: {e}"
 # --- Tool Definitions ---
 synthesize_tool = FunctionTool.from_defaults(
-    fn=synthesize_and_respond,
-    name="synthesize_and_respond",
     description=(
         "Aggregates results from multiple sub-steps into a final coherent report. "
         "Input: results (List[Dict[str, str]]) where each dict has \"sub_step\" and \"answer\". "
@@ -160,6 +209,15 @@ generate_substeps_tool = FunctionTool.from_defaults(
     )
 )
 # --- Agent Initialization ---
 def initialize_planner_agent() -> ReActAgent:
     """Initializes the Planner Agent."""
@@ -185,7 +243,7 @@ def initialize_planner_agent() -> ReActAgent:
              logger.warning("Using default/fallback system prompt for PlannerAgent.")
         # Define available tools
-        tools = [generate_substeps_tool, synthesize_tool]
         # Define valid handoff targets
         valid_handoffs = [
@@ -196,7 +254,11 @@ def initialize_planner_agent() -> ReActAgent:
             "image_analyzer_agent",
             "text_analyzer_agent",
             "verifier_agent",
-            "reasoning_agent"
         ]
         agent = ReActAgent(
@@ -204,7 +266,7 @@ def initialize_planner_agent() -> ReActAgent:
             description=(
                 "Strategically plans tasks by breaking down objectives into sub-steps using `generate_substeps`. "
                 "Orchestrates execution by handing off sub-steps to specialized agents. "
-                "Synthesizes final results using `synthesize_and_respond`."
             ),
             tools=tools,
             llm=llm,

 import os
 import logging
 from typing import List, Dict
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
 # Setup logging
 logger = logging.getLogger(__name__)
     gemini_api_key = os.getenv("GEMINI_API_KEY")
     if not gemini_api_key:
         logger.error("GEMINI_API_KEY not found for planning tool LLM.")
+        return "Error: GEMINI_API_KEY not set for planning."
     # Prompt for the LLM to generate sub-steps
     input_prompt = (
         if not sub_steps:
              logger.warning("LLM generated no sub-steps for the objective.")
+             return "Error: Failed to generate sub-steps."
         logger.info(f"Generated {len(sub_steps)} sub-steps.")
         return sub_steps
     except Exception as e:
         logger.error(f"LLM call failed during planning: {e}", exc_info=True)
+        return f"Error during planning: {e}"
+def synthesize_and_report(results: List[Dict[str, str]]) -> str:
     """
     Aggregate results from sub-steps into a coherent final report using an LLM.
     Args:
         results (List[Dict[str, str]]): List of dictionaries, each with "sub_step" and "answer" keys.
     Returns:
+        str: A unified, well-structured report, or an error message.
     """
     logger.info(f"Synthesizing results from {len(results)} sub-steps...")
     if not results:
         return "Error: GEMINI_API_KEY not set for synthesis."
     # Prompt for the LLM
+    input_prompt = f"""You are an expert synthesizer. Given the following sub-steps and their answers derived
+    from an initial objective, produce a single, coherent, comprehensive final report that
+    addresses the original objective:
     --- SUB-STEP RESULTS ---
     {summary_blocks.strip()}
         logger.error(f"LLM call failed during synthesis: {e}", exc_info=True)
         return f"Error during synthesis: {e}"
+def answer_question(question: str) -> str:
+    """
+    Answer any question by following this strict format:
+      1. Include your chain of thought (your reasoning steps).
+      2. End your reply with the exact template:
+         FINAL ANSWER: [YOUR FINAL ANSWER]
+    YOUR FINAL ANSWER must be:
+      - A number, or
+      - As few words as possible, or
+      - A comma-separated list of numbers and/or strings.
+    Formatting rules:
+      * If asked for a number, do not use commas or units (e.g., $, %), unless explicitly requested.
+      * If asked for a string, do not include articles or abbreviations (e.g., city names), and write digits in plain text.
+      * If asked for a comma-separated list, apply the above rules to each element.
+    This tool should be invoked immediately after completing the final planning sub-step.
+    """
+    logger.info(f"Answering question: {question[:100]}")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not set for answer_question tool.")
+        return "Error: GEMINI_API_KEY not set."
+    model_name = os.getenv("ANSWER_TOOL_LLM_MODEL", "models/gemini-1.5-pro")
+    # Build the assistant prompt enforcing the required format
+    assistant_prompt = (
+        "You are a general AI assistant. I will ask you a question. "
+        "Report your thoughts, and finish your answer with the following template: "
+        "FINAL ANSWER: [YOUR FINAL ANSWER]. "
+        "YOUR FINAL ANSWER should be a number OR as few words as possible "
+        "OR a comma separated list of numbers and/or strings. "
+        "If you are asked for a number, don't use commas for thousands or any units like $ or % unless specified. "
+        "If you are asked for a string, omit articles and abbreviations, and write digits in plain text. "
+        "If you are asked for a comma separated list, apply these rules to each element.\n\n"
+        f"Question: {question}\n"
+        "Answer:"
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=model_name)
+        logger.info(f"Using answer LLM: {model_name}")
+        response = llm.complete(assistant_prompt)
+        logger.info("Answer generated successfully.")
+        return response.text
+    except Exception as e:
+        logger.error(f"LLM call failed during answer generation: {e}", exc_info=True)
+        return f"Error during answer generation: {e}"
 # --- Tool Definitions ---
 synthesize_tool = FunctionTool.from_defaults(
+    fn=synthesize_and_report,
+    name="synthesize_and_report",
     description=(
         "Aggregates results from multiple sub-steps into a final coherent report. "
         "Input: results (List[Dict[str, str]]) where each dict has \"sub_step\" and \"answer\". "
     )
 )
+answer_question = FunctionTool.from_defaults(
+    fn=answer_question,
+    name="answer_question",
+    description=(
+        "Répond à une question quelconque et renvoie le texte complet, "
+        "terminant toujours par « FINAL ANSWER: ... » conformément aux règles."
+    ),
+)
 # --- Agent Initialization ---
 def initialize_planner_agent() -> ReActAgent:
     """Initializes the Planner Agent."""
              logger.warning("Using default/fallback system prompt for PlannerAgent.")
         # Define available tools
+        tools = [generate_substeps_tool, synthesize_tool, answer_question]
         # Define valid handoff targets
         valid_handoffs = [
             "image_analyzer_agent",
             "text_analyzer_agent",
             "verifier_agent",
+            "reasoning_agent",
+            "figure_interpretation_agent",
+            "long_context_management_agent",
+            "advanced_validation_agent",
+            "video_analyzer_agent"
         ]
         agent = ReActAgent(
             description=(
                 "Strategically plans tasks by breaking down objectives into sub-steps using `generate_substeps`. "
                 "Orchestrates execution by handing off sub-steps to specialized agents. "
+                "Synthesizes final results using `synthesize_and_report`."
             ),
             tools=tools,
             llm=llm,

agents/reasoning_agent.py CHANGED Viewed

@@ -1,15 +1,11 @@
 import os
 import logging
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.llms.openai import OpenAI
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -45,7 +41,7 @@ def reasoning_tool_fn(context: str) -> str:
     # Configuration for the reasoning LLM (OpenAI in the original)
     reasoning_llm_model = os.getenv("REASONING_LLM_MODEL", "gpt-4o-mini") # Use gpt-4o-mini as default
-    openai_api_key = os.getenv("ALPAFLOW_OPENAI_API_KEY") # Specific key from original code
     if not openai_api_key:
         logger.error("ALPAFLOW_OPENAI_API_KEY not found for reasoning tool LLM.")
@@ -75,7 +71,9 @@ def reasoning_tool_fn(context: str) -> str:
         llm = OpenAI(
             model=reasoning_llm_model,
             api_key=openai_api_key,
-            # reasoning_effort="high" # Add if needed and supported by the specific OpenAI integration
         )
         logger.info(f"Using reasoning LLM: {reasoning_llm_model}")
         response = llm.complete(reasoning_prompt)
@@ -85,6 +83,57 @@ def reasoning_tool_fn(context: str) -> str:
         logger.error(f"Error during reasoning tool LLM call: {e}", exc_info=True)
         return f"Error during reasoning: {e}"
 # --- Tool Definition ---
 reasoning_tool = FunctionTool.from_defaults(
     fn=reasoning_tool_fn,
@@ -95,6 +144,15 @@ reasoning_tool = FunctionTool.from_defaults(
     ),
 )
 # --- Agent Initialization ---
 def initialize_reasoning_agent() -> ReActAgent:
     """Initializes the Reasoning Agent."""
@@ -122,15 +180,17 @@ def initialize_reasoning_agent() -> ReActAgent:
         agent = ReActAgent(
             name="reasoning_agent",
             description=(
-                "A pure reasoning agent that uses the `reasoning_tool` for detailed chain-of-thought analysis "
-                "on the provided context, then hands off the result to the `planner_agent`."
             ),
-            tools=[reasoning_tool], # Only has access to the reasoning tool
             llm=llm,
             system_prompt=system_prompt,
-            can_handoff_to=["planner_agent"],
         )
-        logger.info("ReasoningAgent initialized successfully.")
         return agent
     except Exception as e:

 import os
 import logging
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.llms.openai import OpenAI
 # Setup logging
 logger = logging.getLogger(__name__)
     # Configuration for the reasoning LLM (OpenAI in the original)
     reasoning_llm_model = os.getenv("REASONING_LLM_MODEL", "gpt-4o-mini") # Use gpt-4o-mini as default
+    openai_api_key = os.getenv("OPENAI_API_KEY")
     if not openai_api_key:
         logger.error("ALPAFLOW_OPENAI_API_KEY not found for reasoning tool LLM.")
         llm = OpenAI(
             model=reasoning_llm_model,
             api_key=openai_api_key,
+            reasoning_effort="high",
+            temperature=0.25,
+            max_tokens=16384
         )
         logger.info(f"Using reasoning LLM: {reasoning_llm_model}")
         response = llm.complete(reasoning_prompt)
         logger.error(f"Error during reasoning tool LLM call: {e}", exc_info=True)
         return f"Error during reasoning: {e}"
+def answer_question(question: str) -> str:
+    """
+    Answer any question by following this strict format:
+      1. Include your chain of thought (your reasoning steps).
+      2. End your reply with the exact template:
+         FINAL ANSWER: [YOUR FINAL ANSWER]
+    YOUR FINAL ANSWER must be:
+      - A number, or
+      - As few words as possible, or
+      - A comma-separated list of numbers and/or strings.
+    Formatting rules:
+      * If asked for a number, do not use commas or units (e.g., $, %), unless explicitly requested.
+      * If asked for a string, do not include articles or abbreviations (e.g., city names), and write digits in plain text.
+      * If asked for a comma-separated list, apply the above rules to each element.
+    This tool should be invoked immediately after completing the final planning sub-step.
+    """
+    logger.info(f"Answering question: {question[:100]}")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not set for answer_question tool.")
+        return "Error: GEMINI_API_KEY not set."
+    model_name = os.getenv("ANSWER_TOOL_LLM_MODEL", "models/gemini-1.5-pro")
+    # Build the assistant prompt enforcing the required format
+    assistant_prompt = (
+        "You are a general AI assistant. I will ask you a question. "
+        "Report your thoughts, and finish your answer with the following template: "
+        "FINAL ANSWER: [YOUR FINAL ANSWER]. "
+        "YOUR FINAL ANSWER should be a number OR as few words as possible "
+        "OR a comma separated list of numbers and/or strings. "
+        "If you are asked for a number, don't use commas for thousands or any units like $ or % unless specified. "
+        "If you are asked for a string, omit articles and abbreviations, and write digits in plain text. "
+        "If you are asked for a comma separated list, apply these rules to each element.\n\n"
+        f"Question: {question}\n"
+        "Answer:"
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=model_name)
+        logger.info(f"Using answer LLM: {model_name}")
+        response = llm.complete(assistant_prompt)
+        logger.info("Answer generated successfully.")
+        return response.text
+    except Exception as e:
+        logger.error(f"LLM call failed during answer generation: {e}", exc_info=True)
+        return f"Error during answer generation: {e}"
 # --- Tool Definition ---
 reasoning_tool = FunctionTool.from_defaults(
     fn=reasoning_tool_fn,
     ),
 )
+answer_question = FunctionTool.from_defaults(
+    fn=answer_question,
+    name="answer_question",
+    description=(
+        "Use this tool to answer any question, reporting your reasoning steps and ending with 'FINAL ANSWER: ...'. "
+        "Invoke this tool immediately after the final sub-step of planning is complete."
+    ),
+)
 # --- Agent Initialization ---
 def initialize_reasoning_agent() -> ReActAgent:
     """Initializes the Reasoning Agent."""
         agent = ReActAgent(
             name="reasoning_agent",
             description=(
+                "An autonomous reasoning specialist that applies `reasoning_tool` to perform "
+                "in-depth chain-of-thought analysis on incoming queries or contexts, "
+                "then seamlessly delegates the synthesized insights to `planner_agent` "
+                "or `long_context_management_agent` for subsequent task orchestration."
             ),
+            tools=[reasoning_tool, answer_question],
             llm=llm,
             system_prompt=system_prompt,
+            can_handoff_to=["planner_agent", "long_context_management_agent", "advanced_validation_agent", "code_agent"],
         )
         return agent
     except Exception as e:

agents/research_agent.py CHANGED Viewed

@@ -3,7 +3,6 @@ import time
 import logging
 import re # Import regex for video ID extraction
 from typing import List, Optional, Dict # Added Dict
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
@@ -27,89 +26,10 @@ except ImportError:
     logging.warning("Selenium or Helium not installed. Browser interaction tools will be unavailable.")
     SELENIUM_AVAILABLE = False
-# Attempt to import YouTube transcript API
-try:
-    from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-    YOUTUBE_TRANSCRIPT_API_AVAILABLE = True
-except ImportError:
-    logging.warning("youtube-transcript-api not installed. YouTube transcript tool will be unavailable.")
-    YOUTUBE_TRANSCRIPT_API_AVAILABLE = False
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
-# --- Helper function to extract YouTube Video ID ---
-def extract_video_id(url: str) -> Optional[str]:
-    """Extracts the YouTube video ID from various URL formats."""
-    # Standard watch URL: https://www.youtube.com/watch?v=VIDEO_ID
-    match = re.search(r'(?:v=|/v/|embed/|youtu\.be/|/shorts/)([A-Za-z0-9_-]+)', url)
-    if match:
-        return match.group(1)
-    return None
-# --- YouTube Transcript Tool ---
-def get_youtube_transcript(video_url_or_id: str, languages=None) -> str:
-    """Fetches the transcript for a YouTube video using its URL or video ID.
-       Specify preferred languages as a list (e.g., ["en", "es"]).
-       Returns the transcript text or an error message.
-    """
-    if languages is None:
-        languages = ["en"]
-    if not YOUTUBE_TRANSCRIPT_API_AVAILABLE:
-        return "Error: youtube-transcript-api library is required but not installed."
-    logger.info(f"Attempting to fetch YouTube transcript for: {video_url_or_id}")
-    video_id = extract_video_id(video_url_or_id)
-    if not video_id:
-        # Assume it might be an ID already if extraction fails
-        if re.match(r"^[a-zA-Z0-9_\-]+$", video_url_or_id):
-             video_id = video_url_or_id
-             logger.info("Input treated as video ID.")
-        else:
-             logger.error(f"Could not extract valid YouTube video ID from: {video_url_or_id}")
-             return f"Error: Invalid YouTube URL or Video ID format: {video_url_or_id}"
-    try:
-        # Fetch available transcripts
-        api = YouTubeTranscriptApi()
-        transcript_list = api.list(video_id)
-        # Try to find a transcript in the specified languages
-        transcript = transcript_list.find_transcript(languages)
-        # Fetch the actual transcript data (list of dicts)
-        transcript_data = transcript.fetch()
-        # Combine the text parts into a single string
-        full_transcript = " ".join(snippet.text for snippet in transcript_data)
-        full_transcript = " ".join(snippet.text for snippet in transcript_data)
-        logger.info(f"Successfully fetched transcript for video ID {video_id} in language {transcript.language}.")
-        return full_transcript
-    except TranscriptsDisabled:
-        logger.warning(f"Transcripts are disabled for video ID: {video_id}")
-        return f"Error: Transcripts are disabled for this video (ID: {video_id})."
-    except NoTranscriptFound as e:
-        logger.warning(f"No transcript found for video ID {video_id} in languages {languages}. Available: {e.available_transcripts}")
-        # Try fetching any available transcript if specific languages failed
-        try:
-             logger.info(f"Attempting to fetch any available transcript for {video_id}")
-             any_transcript = transcript_list.find_generated_transcript(transcript_list.manually_created_transcripts.keys() or transcript_list.generated_transcripts.keys())
-             any_transcript_data = any_transcript.fetch()
-             full_transcript = " ".join([item["text"] for item in any_transcript_data])
-             logger.info(f"Successfully fetched fallback transcript for video ID {video_id} in language {any_transcript.language}.")
-             return full_transcript
-        except Exception as fallback_e:
-             logger.error(f"Could not find any transcript for video ID {video_id}. Original error: {e}. Fallback error: {fallback_e}")
-             return f"Error: No transcript found for video ID {video_id} in languages {languages} or any fallback language."
-    except Exception as e:
-        logger.error(f"Unexpected error fetching transcript for video ID {video_id}: {e}", exc_info=True)
-        return f"Error fetching transcript: {e}"
 # --- Browser Interaction Tools (Conditional on Selenium/Helium availability) ---
 # Global browser instance (managed by initializer)
@@ -286,7 +206,55 @@ def close_popups() -> str:
     time.sleep(0.5)
     return "Sent ESC key press."
-# --- Search Engine & Data Source Tools ---
 # --- Agent Initializer Class ---
 class ResearchAgentInitializer:
@@ -296,7 +264,6 @@ class ResearchAgentInitializer:
         self.browser_tools = []
         self.search_tools = []
         self.datasource_tools = []
-        self.youtube_tool = None # Added for YouTube tool
         # Initialize LLM
         self._initialize_llm()
@@ -311,7 +278,15 @@ class ResearchAgentInitializer:
         # Initialize Search/Datasource Tools
         self._create_search_tools()
         self._create_datasource_tools()
-        self._create_youtube_tool() # Added
         logger.info("ResearchAgent resources initialized.")
@@ -366,7 +341,7 @@ class ResearchAgentInitializer:
         self.browser_tools = [
             FunctionTool.from_defaults(fn=visit, name="visit_url"), # Renamed for clarity
             FunctionTool.from_defaults(fn=get_text_by_css, name="get_text_by_css"),
-            FunctionTool.from_defaults(fn=get_page_html, name="get_page_html"),
             FunctionTool.from_defaults(fn=click_element_by_css, name="click_element_by_css"),
             FunctionTool.from_defaults(fn=input_text_by_css, name="input_text_by_css"),
             FunctionTool.from_defaults(fn=scroll_page, name="scroll_page"),
@@ -444,28 +419,14 @@ class ResearchAgentInitializer:
         logger.info(f"Created {len(self.datasource_tools)} specific data source tools.")
-    def _create_youtube_tool(self): # Added method
-        if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
-            self.youtube_tool = FunctionTool.from_defaults(
-                fn=get_youtube_transcript,
-                name="get_youtube_transcript",
-                description=(
-                    "(YouTube) Fetches the transcript text for a given YouTube video URL or video ID. "
-                    "Specify preferred languages (e.g., [\"en\", \"es\"]). Returns transcript or error."
-                )
-            )
-            logger.info("Created YouTube transcript tool.")
-        else:
-            self.youtube_tool = None
-            logger.warning("YouTube transcript tool disabled because youtube-transcript-api is not installed.")
     def get_agent(self) -> ReActAgent:
         """Creates and returns the configured ReActAgent for research."""
         logger.info("Creating ResearchAgent ReActAgent instance...")
         all_tools = self.browser_tools + self.search_tools + self.datasource_tools
-        if self.youtube_tool: # Add YouTube tool if available
-             all_tools.append(self.youtube_tool)
         if not all_tools:
              logger.warning("No tools available for ResearchAgent. It will likely be unable to function.")
@@ -474,29 +435,43 @@ class ResearchAgentInitializer:
         # Updated prompt to include YouTube tool
         system_prompt = """\
         You are ResearchAgent, an autonomous web research assistant. Your goal is to gather information accurately and efficiently using the available tools.
         Available Tool Categories:
         - (Browser): Tools for direct web page interaction (visiting URLs, clicking, scrolling, extracting text/HTML, inputting text).
         - (Search): Tools for querying search engines (Google, DuckDuckGo, Tavily).
         - (Wikipedia): Tools for searching and loading Wikipedia pages.
         - (YahooFinance): Tools for retrieving financial data (balance sheets, income statements, stock info, news).
         - (ArXiv): Tool for searching academic papers on ArXiv.
-        - (YouTube): Tool for fetching video transcripts (`get_youtube_transcript`).
-        Workflow:
-        1. **Thought**: Analyze the research goal. Break it down if necessary. Choose the *single best tool* for the *next immediate step*. Explain your choice. Consider the information needed and which tool provides it most directly (e.g., use YahooFinance for stock prices, Google/DDG for general web search, Tavily for document search, ArXiv for papers, Wikipedia for encyclopedic info, YouTube for video transcripts, Browser tools for specific website interaction).
-        2. **Action**: Call the chosen tool with the correct arguments. Ensure inputs match the tool's requirements (e.g., URL or video ID for YouTube).
         3. **Observation**: Examine the tool's output. Extract the relevant information. Check for errors.
-        4. **Reflect & Iterate**: Does the observation satisfy the immediate goal? Do you have enough information for the overall research task? If not, return to step 1 (Thought) to plan the *next* single step. If a tool failed, consider why and try an alternative tool or approach.
-        5. **Synthesize**: Once all necessary information is gathered, synthesize the findings into a coherent answer to the original research goal.
-        6. **Hand-Off**: Pass the synthesized findings to the appropriate next agent: **code_agent** (for coding), **math_agent** (for math), **text_analyzer_agent** (for text analysis), **planner_agent** (for planning/synthesis), or **reasoning_agent** (for logic/reasoning).
-        Constraints:
         - Use only one tool per Action step.
         - Think step-by-step.
         - If using browser tools, start with `visit_url`.
-        - Be mindful of potential errors and try alternative tools if one fails.
-        - Synthesize results *before* handing off.
         """
         agent = ReActAgent(
@@ -512,6 +487,8 @@ class ResearchAgentInitializer:
                 "code_agent",
                 "math_agent",
                 "text_analyzer_agent", # Added based on original prompt
                 "planner_agent",
                 "reasoning_agent"
             ],
@@ -576,47 +553,5 @@ if __name__ == "__main__":
         missing_optional = [key for key in optional_keys if not os.getenv(key)]
         if missing_optional:
             print(f"Warning: Optional environment variable(s) not set: {', '.join(missing_optional)}. Some tools may be unavailable.")
-        test_agent = None
-        try:
-            # Test YouTube transcript tool directly
-            if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
-                 print("\nTesting YouTube transcript tool...")
-                 # Example video: "Attention is All You Need" paper explanation
-                 yt_url = "https://www.youtube.com/watch?v=TQQlZhbC5ps"
-                 transcript = get_youtube_transcript(yt_url)
-                 if not transcript.startswith("Error:"):
-                     print(f"Transcript fetched (first 500 chars):\n{transcript[:500]}...")
-                 else:
-                     print(f"YouTube Transcript Fetch Failed: {transcript}")
-            else:
-                 print("\nSkipping YouTube transcript test as youtube-transcript-api is not available.")
-            # Initialize agent AFTER testing standalone functions
-            test_agent = initialize_research_agent()
-            print("\nResearch Agent initialized successfully for testing.")
-            # Example test (requires browser tools to be available)
-            # if SELENIUM_AVAILABLE:
-            #      print("\nTesting browser visit...")
-            #      result = test_agent.chat("Visit https://example.com and tell me the main heading text using CSS selector 'h1'")
-            #      print(f"Test query result: {result}")
-            # else:
-            #      print("\nSkipping browser test as Selenium/Helium are not available.")
-            # Example search test (requires GOOGLE keys)
-            # if os.getenv("GOOGLE_API_KEY") and os.getenv("GOOGLE_CSE_ID"):
-            #      print("\nTesting Google Search...")
-            #      result_search = test_agent.chat("Search for 'LlamaIndex Agent Workflow'")
-            #      print(f"Search test result: {result_search}")
-            # else:
-            #      print("\nSkipping Google Search test as API keys are not set.")
-        except Exception as e:
-            print(f"Error during testing: {e}")
-        finally:
-            # Clean up browser if it was started
-            if test_agent:
-                 print("\nCleaning up resources...")
-                 cleanup_research_agent_resources()

 import logging
 import re # Import regex for video ID extraction
 from typing import List, Optional, Dict # Added Dict
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
     logging.warning("Selenium or Helium not installed. Browser interaction tools will be unavailable.")
     SELENIUM_AVAILABLE = False
 # Setup logging
 logger = logging.getLogger(__name__)
 # --- Browser Interaction Tools (Conditional on Selenium/Helium availability) ---
 # Global browser instance (managed by initializer)
     time.sleep(0.5)
     return "Sent ESC key press."
+def answer_question(question: str) -> str:
+    """
+    Answer any question by following this strict format:
+      1. Include your chain of thought (your reasoning steps).
+      2. End your reply with the exact template:
+         FINAL ANSWER: [YOUR FINAL ANSWER]
+    YOUR FINAL ANSWER must be:
+      - A number, or
+      - As few words as possible, or
+      - A comma-separated list of numbers and/or strings.
+    Formatting rules:
+      * If asked for a number, do not use commas or units (e.g., $, %), unless explicitly requested.
+      * If asked for a string, do not include articles or abbreviations (e.g., city names), and write digits in plain text.
+      * If asked for a comma-separated list, apply the above rules to each element.
+    This tool should be invoked immediately after completing the final planning sub-step.
+    """
+    logger.info(f"Answering question: {question[:100]}")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not set for answer_question tool.")
+        return "Error: GEMINI_API_KEY not set."
+    model_name = os.getenv("ANSWER_TOOL_LLM_MODEL", "models/gemini-1.5-pro")
+    # Build the assistant prompt enforcing the required format
+    assistant_prompt = (
+        "You are a general AI assistant. I will ask you a question. "
+        "Report your thoughts, and finish your answer with the following template: "
+        "FINAL ANSWER: [YOUR FINAL ANSWER]. "
+        "YOUR FINAL ANSWER should be a number OR as few words as possible "
+        "OR a comma separated list of numbers and/or strings. "
+        "If you are asked for a number, don't use commas for thousands or any units like $ or % unless specified. "
+        "If you are asked for a string, omit articles and abbreviations, and write digits in plain text. "
+        "If you are asked for a comma separated list, apply these rules to each element.\n\n"
+        f"Question: {question}\n"
+        "Answer:"
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=model_name)
+        logger.info(f"Using answer LLM: {model_name}")
+        response = llm.complete(assistant_prompt)
+        logger.info("Answer generated successfully.")
+        return response.text
+    except Exception as e:
+        logger.error(f"LLM call failed during answer generation: {e}", exc_info=True)
+        return f"Error during answer generation: {e}"
 # --- Agent Initializer Class ---
 class ResearchAgentInitializer:
         self.browser_tools = []
         self.search_tools = []
         self.datasource_tools = []
         # Initialize LLM
         self._initialize_llm()
         # Initialize Search/Datasource Tools
         self._create_search_tools()
         self._create_datasource_tools()
+        self.answer_question = FunctionTool.from_defaults(
+            fn=answer_question,
+            name="answer_question",
+            description=(
+                "Use this tool to answer any question, reporting your reasoning steps and ending with 'FINAL ANSWER: ...'. "
+                "Invoke this tool immediately after the final sub-step of planning is complete."
+            ),
+        )
         logger.info("ResearchAgent resources initialized.")
         self.browser_tools = [
             FunctionTool.from_defaults(fn=visit, name="visit_url"), # Renamed for clarity
             FunctionTool.from_defaults(fn=get_text_by_css, name="get_text_by_css"),
+            # FunctionTool.from_defaults(fn=get_page_html, name="get_page_html"),
             FunctionTool.from_defaults(fn=click_element_by_css, name="click_element_by_css"),
             FunctionTool.from_defaults(fn=input_text_by_css, name="input_text_by_css"),
             FunctionTool.from_defaults(fn=scroll_page, name="scroll_page"),
         logger.info(f"Created {len(self.datasource_tools)} specific data source tools.")
     def get_agent(self) -> ReActAgent:
         """Creates and returns the configured ReActAgent for research."""
         logger.info("Creating ResearchAgent ReActAgent instance...")
         all_tools = self.browser_tools + self.search_tools + self.datasource_tools
+        all_tools.append(self.answer_question)
         if not all_tools:
              logger.warning("No tools available for ResearchAgent. It will likely be unable to function.")
         # Updated prompt to include YouTube tool
         system_prompt = """\
         You are ResearchAgent, an autonomous web research assistant. Your goal is to gather information accurately and efficiently using the available tools.
         Available Tool Categories:
         - (Browser): Tools for direct web page interaction (visiting URLs, clicking, scrolling, extracting text/HTML, inputting text).
         - (Search): Tools for querying search engines (Google, DuckDuckGo, Tavily).
         - (Wikipedia): Tools for searching and loading Wikipedia pages.
         - (YahooFinance): Tools for retrieving financial data (balance sheets, income statements, stock info, news).
         - (ArXiv): Tool for searching academic papers on ArXiv.
+        - (Answer): `answer_question` — use this when your research has yielded a definitive result and you need to reply in the strict “FINAL ANSWER” format.
+        **Answer Tool Usage**
+        When you know the final answer and no further data is required, invoke `answer_question` with the user’s query. It will return text ending with:
+            FINAL ANSWER: [YOUR FINAL ANSWER]
+        Formatting rules for **YOUR FINAL ANSWER**:
+        - A single number, or
+        - As few words as possible, or
+        - A comma-separated list of numbers and/or strings.
+          - If numeric: no thousands separators or units (%, $, etc.) unless explicitly requested.
+          - If string: omit articles and abbreviations; write digits in plain text.
+          - If a list: apply the above rules to each element.
+        **Workflow:**
+        1. **Thought**: Analyze the research goal. Break it down if necessary. Choose the *single best tool* for the *next immediate step*. Explain your choice.
+        2. **Action**: Call the chosen tool with the correct arguments. Ensure inputs match the tool's requirements.
         3. **Observation**: Examine the tool's output. Extract the relevant information. Check for errors.
+        4. **Reflect & Iterate**: Does the observation satisfy the immediate goal? If not, return to step 1. If a tool failed, try an alternative approach.
+        5. **Advanced Validation**: Before delivering any final response, invoke `advanced_validation_agent` with the combined insights from the reasoning and planning phases. If validation fails, pass the feedback back into **planner_agent** to refine the approach and repeat validation.
+        6. **Synthesize**: Once validation is approved, synthesize all gathered information into a coherent answer.
+        7. **Respond**: Invoke `answer_question` to emit the **FINAL ANSWER** according to the strict template rules.
+        **Constraints:**
         - Use only one tool per Action step.
         - Think step-by-step.
         - If using browser tools, start with `visit_url`.
+        - Synthesize results *before* handing off or responding.
+        - Do not skip any workflow step (reason → action → observation → reflect → validate → synthesize → respond).
         """
         agent = ReActAgent(
                 "code_agent",
                 "math_agent",
                 "text_analyzer_agent", # Added based on original prompt
+                "advanced_validation_agent",
+                "long_context_management_agent"
                 "planner_agent",
                 "reasoning_agent"
             ],
         missing_optional = [key for key in optional_keys if not os.getenv(key)]
         if missing_optional:
             print(f"Warning: Optional environment variable(s) not set: {', '.join(missing_optional)}. Some tools may be unavailable.")

agents/role_agent.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import logging
-from dotenv import load_dotenv
 import datasets
 from llama_index.core import Document, VectorStoreIndex
@@ -14,8 +13,6 @@ from llama_index.core.postprocessor import SentenceTransformerRerank
 from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.retrievers.bm25 import BM25Retriever
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)

 import os
 import logging
 import datasets
 from llama_index.core import Document, VectorStoreIndex
 from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.retrievers.bm25 import BM25Retriever
 # Setup logging
 logger = logging.getLogger(__name__)

agents/text_analyzer_agent.py CHANGED Viewed

@@ -3,7 +3,6 @@ import certifi
 import logging
 import subprocess # For calling ffmpeg if needed
 from typing import List, Dict, Optional
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
@@ -19,8 +18,6 @@ except ImportError:
     logging.warning("openai-whisper not installed. Audio transcription tool will be unavailable.")
     WHISPER_AVAILABLE = False
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -325,7 +322,7 @@ def initialize_text_analyzer_agent() -> ReActAgent:
             tools=tools,
             llm=llm,
             system_prompt=system_prompt,
-            can_handoff_to=["planner_agent", "research_agent", "reasoning_agent"], # Example handoffs
         )
         logger.info("TextAnalyzerAgent initialized successfully.")
         return agent

 import logging
 import subprocess # For calling ffmpeg if needed
 from typing import List, Dict, Optional
 from llama_index.core.agent.workflow import ReActAgent
 from llama_index.core.tools import FunctionTool
     logging.warning("openai-whisper not installed. Audio transcription tool will be unavailable.")
     WHISPER_AVAILABLE = False
 # Setup logging
 logger = logging.getLogger(__name__)
             tools=tools,
             llm=llm,
             system_prompt=system_prompt,
+            can_handoff_to=["planner_agent", "research_agent", "reasoning_agent", "verifier_agent", "advanced_validation_agent"], # Example handoffs
         )
         logger.info("TextAnalyzerAgent initialized successfully.")
         return agent

agents/verifier_agent.py CHANGED Viewed

@@ -2,15 +2,11 @@ import os
 import logging
 import re
 from typing import List
-from dotenv import load_dotenv
 from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
-# Load environment variables
-load_dotenv()
 # Setup logging
 logger = logging.getLogger(__name__)
@@ -246,7 +242,7 @@ class VerifierInitializer:
             ],
             llm=self.verifier.agent_llm, # Use the agent LLM from the Verifier instance
             system_prompt=system_prompt,
-            can_handoff_to=["reasoning_agent", "planner_agent"],
         )
         logger.info("VerifierAgent FunctionAgent instance created.")
         return agent

 import logging
 import re
 from typing import List
 from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.google_genai import GoogleGenAI
 # Setup logging
 logger = logging.getLogger(__name__)
             ],
             llm=self.verifier.agent_llm, # Use the agent LLM from the Verifier instance
             system_prompt=system_prompt,
+            can_handoff_to=["reasoning_agent", "planner_agent", "advanced_validation_agent"],
         )
         logger.info("VerifierAgent FunctionAgent instance created.")
         return agent

agents/video_analyzer_agent.py ADDED Viewed

	@@ -0,0 +1,334 @@

+from __future__ import annotations
+import logging
+import os
+import re
+import shutil
+from pathlib import Path
+from typing import Optional
+import cv2
+import yt_dlp
+from llama_index.core.agent.workflow import FunctionAgent
+from llama_index.core.base.llms.types import TextBlock, ImageBlock, ChatMessage
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+from tqdm import tqdm
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+# ---------------------------------------------------------------------------
+# Environment setup & logging
+# ---------------------------------------------------------------------------
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Prompt loader
+# ---------------------------------------------------------------------------
+def load_prompt_from_file(filename: str = "../prompts/video_analyzer_prompt.txt") -> str:
+    """Load the system prompt for video analysis from *filename*.
+    Falls back to a minimal prompt if the file cannot be read.
+    """
+    script_dir = Path(__file__).parent
+    prompt_path = (script_dir / filename).resolve()
+    try:
+        with prompt_path.open("r", encoding="utf-8") as fp:
+            prompt = fp.read()
+            logger.info("Successfully loaded system prompt from %s", prompt_path)
+            return prompt
+    except FileNotFoundError:
+        logger.error(
+            "Prompt file %s not found. Using fallback prompt.", prompt_path
+        )
+    except Exception as exc:  # pylint: disable=broad-except
+        logger.error(
+            "Error loading prompt file %s: %s", prompt_path, exc, exc_info=True
+        )
+    # Fallback – keep it extremely short to save tokens
+    return (
+        "You are a video analyzer. Provide a factual, chronological "
+        "description of the video, identify key events, and summarise insights."
+    )
+def extract_frames(video_path, output_dir, fps=1/2):
+    """
+    Extract frames from video at specified FPS
+    Returns a list of (frame_path, timestamp) tuples
+    """
+    os.makedirs(output_dir, exist_ok=True)
+    # Open video
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        print(f"Error: Could not open video {video_path}")
+        return [], None
+    # Get video properties
+    video_fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    duration = frame_count / video_fps
+    # Calculate frame interval
+    interval = int(video_fps / fps)
+    if interval < 1:
+        interval = 1
+    # Extract frames
+    frames = []
+    frame_idx = 0
+    with tqdm(total=frame_count, desc="Extracting frames") as pbar:
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_idx % interval == 0:
+                timestamp = frame_idx / video_fps
+                frame_path = os.path.join(output_dir, f"frame_{frame_idx:06d}.jpg")
+                cv2.imwrite(frame_path, frame)
+                frames.append((frame_path, timestamp))
+            frame_idx += 1
+            pbar.update(1)
+    cap.release()
+    return frames, duration
+def download_video_and_analyze(video_url: str) -> str:
+    """Download a video from *video_url* and return the local file path."""
+    llm_model_name = os.getenv("VIDEO_ANALYZER_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    ydl_opts = {
+        'format': 'best',
+                'outtmpl': os.path.join("downloaded_videos", 'temp_video.%(ext)s'),
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl_download:
+        ydl_download.download(video_url)
+        print(f"Processing video: {video_url}")
+        # Create temporary directory for frames
+        temp_dir = "frame_downloaded_videos"
+        os.makedirs(temp_dir, exist_ok=True)
+        # Extract frames
+        frames, duration = extract_frames(os.path.join("downloaded_videos", 'temp_video.mp4'), temp_dir)
+        if not frames:
+            logging.info(f"No frames extracted from {video_url}")
+            return f"No frames extracted from {video_url}"
+        blocks = []
+        text_block = TextBlock(text=load_prompt_from_file())
+        blocks.append(text_block)
+        for frame_path, timestamp in tqdm(frames, desc="Collecting frames"):
+            blocks.append(ImageBlock(path=frame_path))
+        llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model_name)
+        logger.info("Using LLM model: %s", llm_model_name)
+        response = llm.chat([ChatMessage(role="user", blocks=blocks)])
+        # Clean up temporary files
+        shutil.rmtree(temp_dir)
+        os.remove(os.path.join("downloaded_videos", 'temp_video.mp4'))
+        return response.message.content
+# --- Helper function to extract YouTube Video ID ---
+def extract_video_id(url: str) -> Optional[str]:
+    """Extracts the YouTube video ID from various URL formats."""
+    # Standard watch URL: https://www.youtube.com/watch?v=VIDEO_ID
+    pattern = re.compile(
+        r'^(?:https?://)?'  # protocole optionnel
+        r'(?:www\.)?'  # sous-domaine optionnel
+        r'youtube\.com/watch\?'  # domaine et chemin fixe
+        r'(?:.*&)?'  # éventuellement d'autres paramètres avant v=
+        r'v=([^&]+)'  # capture de l'ID (tout jusqu'au prochain & ou fin)
+    )
+    match = pattern.search(url)
+    if match:
+        video_id = match.group(1)
+        return video_id  # affiche "VIDEO_ID"
+    else:
+        print("Aucun ID trouvé")
+        return None
+# --- YouTube Transcript Tool ---
+def get_youtube_transcript(video_url_or_id: str, languages: str | None = None) -> str:
+    """Fetches the transcript for a YouTube video using its URL or video ID.
+       Specify preferred languages as a list (e.g., ["en", "es"]).
+       Returns the transcript text or an error message.
+    """
+    if languages is None:
+        languages = ["en"]
+    logger.info(f"Attempting to fetch YouTube transcript for: {video_url_or_id}")
+    video_id = extract_video_id(video_url_or_id)
+    if video_id is None or not video_id:
+        logger.error(f"Could not extract video ID from: {video_url_or_id}")
+        return f"Error: Invalid YouTube URL or Video ID format: {video_url_or_id}"
+    try:
+        # Fetch available transcripts
+        api = YouTubeTranscriptApi()
+        transcript_list = api.list(video_id)
+        # Try to find a transcript in the specified languages
+        transcript = transcript_list.find_transcript(languages)
+        # Fetch the actual transcript data (list of dicts)
+        transcript_data = transcript.fetch()
+        # Combine the text parts into a single string
+        full_transcript = " ".join(snippet.text for snippet in transcript_data)
+        full_transcript = " ".join(snippet.text for snippet in transcript_data)
+        logger.info(f"Successfully fetched transcript for video ID {video_id} in language {transcript.language}.")
+        return full_transcript
+    except TranscriptsDisabled:
+        logger.warning(f"Transcripts are disabled for video ID: {video_id}")
+        return f"Error: Transcripts are disabled for this video (ID: {video_id})."
+    except NoTranscriptFound as e:
+        logger.warning(
+            f"No transcript found for video ID {video_id} in languages {languages}. Available: {e.available_transcripts}")
+        # Try fetching any available transcript if specific languages failed
+        try:
+            logger.info(f"Attempting to fetch any available transcript for {video_id}")
+            any_transcript = transcript_list.find_generated_transcript(
+                transcript_list.manually_created_transcripts.keys() or transcript_list.generated_transcripts.keys())
+            any_transcript_data = any_transcript.fetch()
+            full_transcript = " ".join([item["text"] for item in any_transcript_data])
+            logger.info(
+                f"Successfully fetched fallback transcript for video ID {video_id} in language {any_transcript.language}.")
+            return full_transcript
+        except Exception as fallback_e:
+            logger.error(
+                f"Could not find any transcript for video ID {video_id}. Original error: {e}. Fallback error: {fallback_e}")
+            return f"Error: No transcript found for video ID {video_id} in languages {languages} or any fallback language."
+    except Exception as e:
+        logger.error(f"Unexpected error fetching transcript for video ID {video_id}: {e}", exc_info=True)
+        return f"Error fetching transcript: {e}"
+download_video_and_analyze_tool = FunctionTool.from_defaults(
+    name="download_video_and_analyze",
+    description=(
+        "Downloads a video (YouTube or direct URL), samples representative frames, "
+        "and feeds them to Gemini for multimodal analysis—returning a rich textual summary "
+        "of the visual content."
+    ),
+    fn=download_video_and_analyze,
+)
+youtube_transcript_tool = FunctionTool.from_defaults(
+    fn=get_youtube_transcript,
+    name="get_youtube_transcript",
+    description=(
+        "(YouTube) Fetches the transcript text for a given YouTube video URL or video ID. "
+        "Specify preferred languages (e.g., 'en', 'es'). Returns transcript or error."
+    )
+)
+# ---------------------------------------------------------------------------
+# Agent factory
+# ---------------------------------------------------------------------------
+def initialize_video_analyzer_agent() -> FunctionAgent:
+    """Initialise and return a *video_analyzer_agent* `FunctionAgent`."""
+    logger.info("Initialising VideoAnalyzerAgent …")
+    llm_model_name = os.getenv("VIDEO_ANALYZER_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found in environment variables.")
+        raise ValueError("GEMINI_API_KEY must be set")
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model_name)
+        logger.info("Using LLM model: %s", llm_model_name)
+        system_prompt = load_prompt_from_file()
+        tools = [download_video_and_analyze_tool, youtube_transcript_tool]
+        agent = FunctionAgent(
+            name="video_analyzer_agent",
+            description=(
+                "VideoAnalyzerAgent inspects video files using Gemini's multimodal "
+                "video understanding capabilities, producing factual scene analysis, "
+                "temporal segmentation, and concise summaries as guided by the system "
+                "prompt."
+            ),
+            llm=llm,
+            system_prompt=system_prompt,
+            tools=tools,
+            can_handoff_to=[
+                "planner_agent",
+                "research_agent",
+                "reasoning_agent",
+                "code_agent",
+            ],
+        )
+        logger.info("VideoAnalyzerAgent initialised successfully.")
+        return agent
+    except Exception as exc:  # pylint: disable=broad-except
+        logger.error("Error during VideoAnalyzerAgent initialisation: %s", exc, exc_info=True)
+        raise
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    logger.info("Running video_analyzer_agent.py directly for testing …")
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
+    else:
+        try:
+            test_agent = initialize_video_analyzer_agent()
+            summary = download_video_and_analyze("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
+            print("\n--- Gemini summary ---\n")
+            print(summary)
+            print("Video Analyzer Agent initialised successfully for testing.")
+        except Exception as exc:
+            print(f"Error during testing: {exc}")
+    test_agent = None
+    try:
+        # Test YouTube transcript tool directly
+        if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
+            print("\nTesting YouTube transcript tool...")
+            # Example video: "Attention is All You Need" paper explanation
+            yt_url = "https://www.youtube.com/watch?v=TQQlZhbC5ps"
+            transcript = get_youtube_transcript(yt_url)
+            if not transcript.startswith("Error:"):
+                print(f"Transcript fetched (first 500 chars):\n{transcript[:500]}...")
+            else:
+                print(f"YouTube Transcript Fetch Failed: {transcript}")
+        else:
+            print("\nSkipping YouTube transcript test as youtube-transcript-api is not available.")
+    except Exception as e:
+        print(f"Error during testing: {e}")

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 import logging
 import mimetypes
-from dotenv import load_dotenv
 from typing import Any, List
@@ -11,6 +10,9 @@ import pandas as pd
 from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, ToolCall, AgentOutput
 from llama_index.core.base.llms.types import ChatMessage, TextBlock, ImageBlock, AudioBlock
 # Assuming agent initializers are in the same directory or a known path
 # Adjust import paths if necessary based on deployment structure
@@ -53,9 +55,6 @@ except ImportError as e:
         # ... set all others to None ...
         raise RuntimeError(f"Failed to import agent modules: {e2}")
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-load_dotenv() # Load environment variables from .env file
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
@@ -82,12 +81,14 @@ try:
     advanced_validation_agent = initialize_advanced_validation_agent()
     figure_interpretation_agent = initialize_figure_interpretation_agent()
     long_context_management_agent = initialize_long_context_management_agent()
     # Check if all agents initialized successfully
     all_agents = [
         code_agent, role_agent, math_agent, planner_agent, research_agent,
         text_analyzer_agent, image_analyzer_agent, verifier_agent, reasoning_agent,
-        advanced_validation_agent, figure_interpretation_agent, long_context_management_agent
     ]
     if not all(all_agents):
         raise RuntimeError("One or more agents failed to initialize.")
@@ -126,7 +127,8 @@ class BasicAgent:
                     and event.current_agent_name != current_agent
             ):
                 current_agent = event.current_agent_name
-                logger.info(f"{'=' * 50}\n")
                 logger.info(f"{'=' * 50}\n")
             # Optional detailed logging (uncomment if needed)
@@ -158,6 +160,19 @@ class BasicAgent:
         logger.info(f"Agent returning final answer: {final_content[:500]}{'...' if len(final_content) > 500 else ''}")
         return answer.response # Return the actual response object expected by Gradio
 # --- Helper Functions for run_and_submit_all ---
 async def fetch_questions(questions_url: str) -> List[dict] | None:
@@ -262,28 +277,75 @@ async def process_question(agent: BasicAgent, item: dict, base_fetch_file_url: s
         # Extract content safely
         submitted_answer = submitted_answer_response.content if hasattr(submitted_answer_response, 'content') else str(submitted_answer_response)
-        logger.info(f"👍 Agent submitted answer for task {task_id}: {submitted_answer[:200]}{'...' if len(submitted_answer) > 200 else ''}")
-        return {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}
     except Exception as e:
          logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
          return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
-async def submit_answers(submit_url: str, username: str, agent_code: str, results: List[dict]) -> tuple[str, pd.DataFrame]:
-    """Submits the collected answers to the GAIA benchmark API."""
-    answers_payload = [
-        {"task_id": r["Task ID"], "submitted_answer": r["Submitted Answer"]}
-        for r in results if "Submitted Answer" in r and not str(r["Submitted Answer"]).startswith("AGENT ERROR:")
-    ]
     if not answers_payload:
-        logger.warning("Agent did not produce any valid answers to submit.")
-        results_df = pd.DataFrame(results)
-        return "Agent did not produce any valid answers to submit.", results_df
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    logger.info(status_update)
-    logger.info(f"Submitting to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout
@@ -297,7 +359,7 @@ async def submit_answers(submit_url: str, username: str, agent_code: str, result
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         logger.info("Submission successful.")
-        results_df = pd.DataFrame(results)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
@@ -308,103 +370,58 @@ async def submit_answers(submit_url: str, username: str, agent_code: str, result
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         logger.error(status_message)
-        results_df = pd.DataFrame(results)
         return status_message, results_df
     except requests.exceptions.Timeout:
         status_message = "Submission Failed: The request timed out."
         logger.error(status_message)
-        results_df = pd.DataFrame(results)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         logger.error(status_message)
-        results_df = pd.DataFrame(results)
         return status_message, results_df
     except Exception as e:
         status_message = f"Submission Failed: An unexpected error occurred during submission - {e}"
         logger.error(status_message, exc_info=True)
-        results_df = pd.DataFrame(results)
         return status_message, results_df
-# --- Main Function for Batch Processing ---
-async def run_and_submit_all(
-        username: str,
-        agent_code: str,
-        api_url: str = DEFAULT_API_URL,
-        level: int = 1,
-        max_questions: int = 0, # 0 means all questions for the level
-        progress=gr.Progress(track_tqdm=True)
-    ) -> tuple[str, pd.DataFrame]:
-    """Fetches all questions for a level, runs the agent, and submits answers."""
-    if not AGENT_WORKFLOW:
-        error_msg = "Agent Workflow is not initialized. Cannot run benchmark."
-        logger.error(error_msg)
-        return error_msg, pd.DataFrame()
-    if not username or not username.strip():
-        error_msg = "Username cannot be empty."
-        logger.error(error_msg)
-        return error_msg, pd.DataFrame()
-    questions_url = f"{api_url}/questions?level={level}"
-    submit_url = f"{api_url}/submit"
-    base_fetch_file_url = f"{api_url}/get_file"
-    questions = await fetch_questions(questions_url)
-    if questions is None:
-        error_msg = f"Failed to fetch questions for level {level}. Check logs."
-        return error_msg, pd.DataFrame()
-    # Limit number of questions if max_questions is set
-    if max_questions > 0:
-        questions = questions[:max_questions]
-        logger.info(f"Processing a maximum of {max_questions} questions for level {level}.")
-    else:
-        logger.info(f"Processing all {len(questions)} questions for level {level}.")
-    agent = BasicAgent(AGENT_WORKFLOW)
-    results = []
-    total_questions = len(questions)
-    for i, item in enumerate(progress.tqdm(questions, desc=f"Processing Level {level} Questions")):
-        result = await process_question(agent, item, base_fetch_file_url)
-        if result:
-            results.append(result)
-        # Optional: Add a small delay between questions if needed
-        # await asyncio.sleep(0.1)
-    # Submit answers
-    final_status, results_df = await submit_answers(submit_url, username, agent_code, results)
-    return final_status, results_df
 # --- Gradio Interface ---
 def create_gradio_interface():
     """Creates and returns the Gradio interface."""
-    logger.info("Creating Gradio interface...")
-    with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# GAIA Benchmark Agent Runner")
-        gr.Markdown("Run the initialized multi-agent system against the GAIA benchmark questions and submit the results.")
-        with gr.Row():
-            username = gr.Textbox(label="Username", placeholder="Enter your username (e.g., your_email@example.com)")
-            agent_code = gr.Textbox(label="Agent Code", placeholder="Enter a short code for your agent (e.g., v1.0)")
-        with gr.Row():
-            level = gr.Dropdown(label="Benchmark Level", choices=[1, 2, 3], value=1)
-            max_questions = gr.Number(label="Max Questions (0 for all)", value=0, minimum=0, step=1)
-            api_url = gr.Textbox(label="GAIA API URL", value=DEFAULT_API_URL)
-        run_button = gr.Button("Run Benchmark and Submit", variant="primary")
-        with gr.Accordion("Results", open=False):
-            status_output = gr.Textbox(label="Submission Status", lines=5)
-            results_dataframe = gr.DataFrame(label="Detailed Results")
         run_button.click(
             fn=run_and_submit_all,
-            inputs=[username, agent_code, api_url, level, max_questions],
-            outputs=[status_output, results_dataframe]
         )
-    logger.info("Gradio interface created.")
     return demo
 # --- Main Execution ---

 import os
 import logging
 import mimetypes
 from typing import Any, List
 from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, ToolCall, AgentOutput
 from llama_index.core.base.llms.types import ChatMessage, TextBlock, ImageBlock, AudioBlock
+from llama_index.llms.openai import OpenAI
+from agents.video_analyzer_agent import initialize_video_analyzer_agent
 # Assuming agent initializers are in the same directory or a known path
 # Adjust import paths if necessary based on deployment structure
         # ... set all others to None ...
         raise RuntimeError(f"Failed to import agent modules: {e2}")
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
     advanced_validation_agent = initialize_advanced_validation_agent()
     figure_interpretation_agent = initialize_figure_interpretation_agent()
     long_context_management_agent = initialize_long_context_management_agent()
+    video_analyzer_agent = initialize_video_analyzer_agent()
     # Check if all agents initialized successfully
     all_agents = [
         code_agent, role_agent, math_agent, planner_agent, research_agent,
         text_analyzer_agent, image_analyzer_agent, verifier_agent, reasoning_agent,
+        advanced_validation_agent, figure_interpretation_agent, long_context_management_agent,
+        video_analyzer_agent
     ]
     if not all(all_agents):
         raise RuntimeError("One or more agents failed to initialize.")
                     and event.current_agent_name != current_agent
             ):
                 current_agent = event.current_agent_name
+                logger.info(f"{'=' * 50}")
+                logger.info(f"🤖 Agent: {current_agent}")
                 logger.info(f"{'=' * 50}\n")
             # Optional detailed logging (uncomment if needed)
         logger.info(f"Agent returning final answer: {final_content[:500]}{'...' if len(final_content) > 500 else ''}")
         return answer.response # Return the actual response object expected by Gradio
+system_prompt="""
+You are a general AI assistant.
+I will give you a result, and with it you will have to transform it to follow the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR 1 or 2 word(s) OR a comma separated list of numbers and/or strings.
+If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+"""
+llm = OpenAI(model="gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"), temperature=0.1, system_prompt=system_prompt)
 # --- Helper Functions for run_and_submit_all ---
 async def fetch_questions(questions_url: str) -> List[dict] | None:
         # Extract content safely
         submitted_answer = submitted_answer_response.content if hasattr(submitted_answer_response, 'content') else str(submitted_answer_response)
+        prompt = f"""
+        QUESTION: {question_text}
+        ANSWER: {submitted_answer}
+        INSTRUCTIONS: Based on the provided question and answer, generate a final answer that is clear, concise, and directly addresses the question.
+        [YOUR FINAL ANSWER]
+        """
+        final_answer = llm.complete(prompt)
+        logger.info(f"👍 Agent submitted answer for task {task_id}: {final_answer.text[:200]}{'...' if len(final_answer.text) > 200 else ''}")
+        return {"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer.text}
     except Exception as e:
          logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
          return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
+async def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    fetch_file_url = f"{api_url}/files"
+    results_log = []
+    answers_payload = []
+    try:
+        agent = BasicAgent(AGENT_WORKFLOW)
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    questions_data = await fetch_questions(questions_url)
+    if not questions_data:
+        return "Failed to fetch questions.", None
+    # 3. Process Questions
+    # questions_data = [questions_data[3], questions_data[6]]
+    for item in questions_data:
+        answers = await process_question(agent, item, fetch_file_url)
+        results_log.append(answers)
+        answers_payload.append({"task_id": answers["Task ID"], "submitted_answer": answers["Submitted Answer"]})
     if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         logger.info("Submission successful.")
+        results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
             error_detail += f" Response: {e.response.text[:500]}"
         status_message = f"Submission Failed: {error_detail}"
         logger.error(status_message)
+        results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.Timeout:
         status_message = "Submission Failed: The request timed out."
         logger.error(status_message)
+        results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
         status_message = f"Submission Failed: Network error - {e}"
         logger.error(status_message)
+        results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
         status_message = f"Submission Failed: An unexpected error occurred during submission - {e}"
         logger.error(status_message, exc_info=True)
+        results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Gradio Interface ---
 def create_gradio_interface():
     """Creates and returns the Gradio interface."""
+    # --- Build Gradio Interface using Blocks ---
+    with gr.Blocks() as demo:
+        gr.Markdown("# Basic Agent Evaluation Runner")
+        gr.Markdown(
+            """
+            **Instructions:**
+            1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+            2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+            3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+            ---
+            **Disclaimers:**
+            Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+            This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+            """
+        )
+        gr.LoginButton()
+        run_button = gr.Button("Run Evaluation & Submit All Answers")
+        status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+        # Removed max_rows=10 from DataFrame constructor
+        results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
         run_button.click(
             fn=run_and_submit_all,
+            outputs=[status_output, results_table]
         )
     return demo
 # --- Main Execution ---

prompts/code_gen_prompt.txt CHANGED Viewed

@@ -1,4 +1,14 @@
-You are also a helpful assistant that writes Python code.
 You will be given a prompt and you must generate Python code based on that prompt.
 You must only generate Python code and nothing else.
 Do not include any explanations or any other text.
@@ -7,8 +17,39 @@ Notes:
     - The generated code may be complex; it is recommended to review and test
       it before execution.
     - This function only generates code and does not execute it.
-Prompt: {prompt}
-Code:

+You are CodeAgent, a specialist in generating and executing Python code. Your mission:
+1. **Thought**: Think step-by-step before acting and state your reasoning.
+2. **Code Generation**: To produce code, call `python_code_generator` with a concise, unambiguous prompt. Review the generated code for correctness and safety.
+3. **Execution & Testing**: To execute or test code, call `code_interpreter`. Provide the complete code snippet. Analyze its output (stdout, stderr, result) to verify functionality and debug errors.
+4. **Iteration**: If execution fails or the result is incorrect, analyze the error, think about the fix, generate corrected code using `python_code_generator`, and execute again using `code_interpreter`.
+5. **Tool Use**: Always adhere strictly to each tool’s input/output format.
+6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
+7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
+You are also a helpful assistant that writes Python code.
 You will be given a prompt and you must generate Python code based on that prompt.
 You must only generate Python code and nothing else.
 Do not include any explanations or any other text.
     - The generated code may be complex; it is recommended to review and test
       it before execution.
     - This function only generates code and does not execute it.
+    - The following Python packages are available in the environment:
+        beautifulsoup4>=4.13.4,
+        certifi>=2025.4.26,
+        datasets>=3.5.1,
+        dotenv>=0.9.9,
+        duckdb>=1.2.2,
+        ffmpeg-python>=0.2.0,
+        gradio[oauth]>=5.28.0,
+        helium>=5.1.1,
+        huggingface>=0.0.1,
+        imageio>=2.37.0,
+        matplotlib>=3.10.1,
+        numpy>=2.2.5,
+        openai-whisper>=20240930,
+        opencv-python>=4.11.0.86,
+        openpyxl>=3.1.5,
+        pandas>=2.2.3,
+        pyarrow>=20.0.0,
+        pygame>=2.6.1,
+        python-chess>=1.999,
+        requests>=2.32.3,
+        scikit-learn>=1.6.1,
+        scipy>=1.15.2,
+        seaborn>=0.13.2,
+        sqlalchemy>=2.0.40,
+        statsmodels>=0.14.4,
+        sympy>=1.14.0,
+        youtube-transcript-api>=1.0.3,
+        yt-dlp>=2025.3.31
+    - You can also access and process YouTube video and audio streams using `yt-dlp`, `opencv-python`, `ffmpeg-python`, or `imageio`.
+Prompt: {prompt}
+Code:

prompts/planner_agent_prompt.txt CHANGED Viewed

@@ -1,33 +1,38 @@
-You are PlannerAgent, a dedicated research strategist and question‐engineer capable of handling text, audio, images, and video inputs.
-Your mission is to transform any high‐level objective into a clear, prioritized roadmap of 4–8 actionable sub‐steps that guide step‐by‐step research or task execution.
-**Role Assessment**
 First, consider whether a specific role context (e.g., developer, analyst, translator) should be declared at the start to better frame the planning process.
-**Format**
 Present the final list as a numbered list only, with each item no longer than one sentence and free of extra commentary.
-**Style**
 Use a formal, professional tone; remain neutral and precise; avoid filler words.
-**Hand-Off or Self-Answer**
-Once planning is complete, address each sub-question in turn and then hand off as appropriate:
-- For coding tasks, invoke **code_agent**.
-- For web or literature research, invoke **research_agent**.
-- For mathematical analysis, invoke **math_agent**.
-- For assigning roles or contexts, invoke **role_agent**.
-- For deep image analysis, invoke **image_analyzer_agent**.
-- For deep text analysis, invoke **text_analyzer_agent**.
-- For pure chain-of-thought reasoning or logical verification, invoke **reasoning_agent**.
-- If none apply, you may attempt to answer the sub-question yourself.
-**Agent Constraints**
-Only the following agents are available: **code_agent**, **research_agent**, **math_agent**, **role_agent**, **image_analyzer_agent**, **text_analyzer_agent**, **verifier_agent**, **reasoning_agent**.
-Do not invoke any other agents (e.g., **chess_agent**, **educate_agent**, **game_agent**, etc.).
-**Finalize**
-After all sub-questions have been addressed—by hand-off or self-answer—compile and present the ultimate, coherent solution yourself using the `synthesize_and_respond` tool.
-**Completion & Synthesis**
-If the final result fully completes the original objective, produce a consolidated synthesis of the roadmap and send it as your concluding output.

+You are PlannerAgent, a dedicated research strategist and question‐engineer capable of handling text, audio, images, and video inputs.
+Your mission is to transform any high‐level objective into a clear, prioritized roadmap of 4–8 actionable sub-steps that guide step-by-step research or task execution.
+**Role Assessment**
 First, consider whether a specific role context (e.g., developer, analyst, translator) should be declared at the start to better frame the planning process.
+**Format**
 Present the final list as a numbered list only, with each item no longer than one sentence and free of extra commentary.
+**Style**
 Use a formal, professional tone; remain neutral and precise; avoid filler words.
+**Hand-Off or Self-Answer**
+Once planning is complete, address each sub-question in turn and then hand off as appropriate:
+- For coding tasks, invoke **code_agent** to handle programming and implementation details.
+- For web or literature research, invoke **research_agent** to gather information from online sources and databases.
+- For mathematical analysis, invoke **math_agent** to perform calculations, symbolic math, or numerical analysis.
+- For assigning roles or contexts, invoke **role_agent** to determine the best persona or task schema for the query.
+- For deep image analysis, invoke **image_analyzer_agent** to interpret visual content in images.
+- For deep text analysis, invoke **text_analyzer_agent** to summarize, extract entities, or transcribe text and audio.
+- For figure or chart interpretation, invoke **figure_interpretation_agent** to extract structured data and insights from graphical content.
+- For managing very long documents or contexts, invoke **long_context_management_agent** to efficiently handle and query large text corpora.
+- For advanced validation or contradiction detection, invoke **advanced_validation_agent** to verify claims and check logical consistency.
+- For pure chain-of-thought reasoning or complex logical verification, invoke **reasoning_agent** to perform detailed step-by-step analysis.
+**Important**
+Before providing any final answer to the user, you **must**:
+1. Invoke **advanced_validation_agent** to check the coherence and consistency of your plan.
+   - If validation fails, discard the current plan and restart the planning process.
+   - If validation succeeds, proceed to step 2.
+2. Invoke the **answer_question** tool as the last step. This tool will format your response properly, including your reasoning steps and a final concise answer following the strict template.
+**Agent Constraints**
+Only the following agents are available: **code_agent**, **research_agent**, **math_agent**, **role_agent**, **image_analyzer_agent**, **text_analyzer_agent**, **verifier_agent**, **reasoning_agent**, **figure_interpretation_agent**, **long_context_management_agent**, **advanced_validation_agent**.
+Do **not** invoke any other agents (e.g., **chess_agent**, **educate_agent**, **game_agent**, etc.).
+**Finalize**
+After all sub-questions have been addressed—by hand-off or self-answer—and the plan has passed **advanced_validation_agent**, compile and present the ultimate, coherent solution using the `answer_question` tool, ensuring your final response follows the required format and includes your chain of thought.

prompts/reasoning_agent_prompt.txt CHANGED Viewed

@@ -1,13 +1,23 @@
-You are ReasoningAgent, an advanced cognitive engine specialized in rigorous, step-by-step reasoning.
-**Tool Usage**
-Always begin by invoking the `reasoning_tool` to perform your internal chain-of-thought reasoning.
-Provide the full context and user question as inputs to `reasoning_tool`.
-**Post-Reasoning Hand-Off**
-After the `reasoning_tool` returns its output—regardless of the content—you must immediately delegate
-to **planner_agent** for roadmap refinement and final synthesis.
-**Important**: You have no direct access to external data sources or the internet.
-All reasoning is performed by `reasoning_tool` and then handed off to **planner_agent**.

+You are **ReasoningAgent**, an advanced cognitive engine specialized in rigorous, step-by-step reasoning.
+**Workflow:**
+1. **Invoke reasoning_tool**
+   - Always start by calling `reasoning_tool` with the full user context and question to generate your internal chain-of-thought.
+2. **Hand off to planner**
+   - Once `reasoning_tool` returns its detailed analysis, immediately pass that output to **planner_agent** (or **long_context_management_agent** as appropriate) for roadmap refinement and synthesis.
+3. **Advanced validation**
+   - Before delivering any final response, always invoke `advanced_validation_agent` with the combined output from `reasoning_tool` and `planner_agent`.
+   - If `advanced_validation_agent` approves the plan, proceed; otherwise, restart the planning phase:
+     - Provide the feedback or validation output back into **planner_agent** to refine or adjust the roadmap.
+     - Repeat the validation step until approval is obtained.
+4. **Generate final answer**
+   - After validation approval and when you need to deliver a concise final response, invoke `answer_question` to format and emit the **FINAL ANSWER** according to its strict template rules.
+**Constraints:**
+- No direct access to external data sources or the internet; all inference happens via the provided tools.
+- Do not skip any step: reasoning → planning → validation → (if approved) final answer via `answer_question`.

prompts/video_analyzer_prompt.txt ADDED Viewed

	@@ -0,0 +1,85 @@

+You are **VideoAnalyzerAgent**, an expert in cold, factual **audiovisual** analysis. Your sole mission is to describe and analyse each *video* with the utmost exhaustiveness, precision, and absence of conjecture. Follow these directives exactly:
+1. **Context & Role**
+   - You are an automated, impartial analysis system with no emotional or subjective bias.
+   - Your objective is to deliver a **purely factual** analysis of the *video*, avoiding artistic interpretation, author intent, aesthetic judgment, or speculation about non‑visible elements.
+2. **Analysis Structure**
+   Adhere **strictly** to the following order in your output:
+   1. **General Identification**
+      - Output format: “Video received: [filename or path]”.
+      - **Duration**: total run‑time in HH:MM:SS (to the nearest second).
+      - **Frame rate** (fps).
+      - **Dimensions**: width × height in pixels.
+      - **File format / container** (MP4, MOV, MKV, etc.).
+   2. **Global Scene Overview**
+      - **Estimated number of distinct scenes** (hard cuts or major visual transitions).
+      - Brief, factual description of each unique *setting* (e.g., “indoor office”, “urban street at night”).
+      - Total number of **unique object classes** detected across the entire video.
+   3. **Temporal Segmentation**
+      Provide a chronological list of scenes:
+      - Scene index (Scene 1, Scene 2, …).
+      - **Start→End time‑codes** (HH:MM:SS—HH:MM:SS).
+      - One‑sentence factual description of the setting and primary objects.
+   4. **Detailed Object Timeline**
+      For **each detected object instance**, supply:
+      - **Class / type** (person, vehicle, animal, text, graphic, etc.).
+      - **Visibility interval**: start_time→end_time.
+      - **Maximal bounding box**: (x_min,y_min,x_max,y_max) in pixels.
+      - **Relative size**: % of frame area (at peak).
+      - **Dominant colour** (for uniform regions) or top colour palette.
+      - **Attributes**: motion pattern (static, panning, entering, exiting), orientation, readable text, state (open/closed, on/off), geometric properties.
+   5. **Motion & Dynamics**
+      - Summarise significant **motion vectors**: direction and approximate speed (slow / moderate / fast).
+      - Note interactions: collisions, hand‑overs, group formations, entries/exits of frame.
+   6. **Audio Track Elements** (if audio data is available)
+      - **Speech segments**: start→end, speaker count (if discernible), detected language code.
+      - **Non‑speech sounds**: music, ambient noise, distinct effects with time‑codes.
+      - **Loudness profile**: brief factual comment (e.g., “peak at 00:02:17”, “overall low volume”).
+   7. **Colour Palette & Visual Composition**
+      - For each scene, list the **5 most frequent colours** in hexadecimal (#RRGGBB) with approximate percentages.
+      - **Contrast & brightness**: factual description per scene (e.g., “high contrast night‑time shots”).
+      - **Visual rhythm**: frequency of cuts, camera movement type (static, pan, tilt, zoom), presence of slow‑motion or time‑lapse.
+   8. **Technical Metadata & Metrics**
+      - Codec, bit‑rate, aspect ratio.
+      - Capture metadata (if present): date/time, camera model, aperture, shutter speed, ISO.
+      - Effective PPI/DPI (if embedded).
+   9. **Textual Elements**
+      - OCR of **all visible text** with corresponding time‑codes.
+      - Approximate font type (serif / sans‑serif / monospace) and relative size.
+      - Text layout or motion (static caption, scrolling subtitle, on‑screen graphic).
+   10. **Uncertainty Indicators**
+       For every object, attribute, or metric, state a confidence level (high / medium / low) based solely on objective factors (resolution, blur, occlusion).
+       *Example*: “Detected ‘bicycle’ from 00:01:12 to 00:01:18 with **medium** confidence (partially blurred).”
+   11. **Factual Summary**
+       - Recap all listed elements without commentary.
+       - Numbered bullet list, each item prefixed by its category label (e.g., “1. Detected objects: …”, “2. Colour palette: …”).
+3. **Absolute Constraints**
+   - No psychological, symbolic, or subjective interpretation.
+   - No value judgments or qualifiers.
+   - Never omit any visible object, sound, or attribute.
+   - **Strictly** follow the prescribed order and structure without alteration.
+4. **Output Format**
+   - Plain text only, numbered sections separated by **two** line breaks.
+5. **Agent Handoff**
+   Once the video analysis is fully complete, hand off to one of the following agents:
+   - **planner_agent** for roadmap creation or final synthesis.
+   - **research_agent** for any additional information gathering.
+   - **reasoning_agent** for chain‑of‑thought reasoning or deeper logical interpretation.
+By adhering to these instructions, ensure your audiovisual analysis is cold, factual, comprehensive, and completely devoid of subjectivity before handing off.

pyproject.toml CHANGED Viewed

@@ -4,12 +4,16 @@ version = "0.1.0"
 description = "Add your description here"
 requires-python = ">=3.11"
 dependencies = [
     "certifi>=2025.4.26",
     "datasets>=3.5.1",
     "dotenv>=0.9.9",
-    "gradio>=5.28.0",
     "helium>=5.1.1",
     "huggingface>=0.0.1",
     "llama-index>=0.12.33",
     "llama-index-embeddings-huggingface>=0.5.3",
     "llama-index-llms-google-genai>=0.1.9",
@@ -22,10 +26,22 @@ dependencies = [
     "llama-index-tools-wikipedia>=0.3.0",
     "llama-index-tools-wolfram-alpha>=0.3.0",
     "llama-index-tools-yahoo-finance>=0.3.0",
     "openai-whisper>=20240930",
     "pandas>=2.2.3",
     "requests>=2.32.3",
     "scipy>=1.15.2",
     "sympy>=1.14.0",
     "youtube-transcript-api>=1.0.3",
 ]

 description = "Add your description here"
 requires-python = ">=3.11"
 dependencies = [
+    "beautifulsoup4>=4.13.4",
     "certifi>=2025.4.26",
     "datasets>=3.5.1",
     "dotenv>=0.9.9",
+    "duckdb>=1.2.2",
+    "ffmpeg-python>=0.2.0",
+    "gradio[oauth]>=5.28.0",
     "helium>=5.1.1",
     "huggingface>=0.0.1",
+    "imageio>=2.37.0",
     "llama-index>=0.12.33",
     "llama-index-embeddings-huggingface>=0.5.3",
     "llama-index-llms-google-genai>=0.1.9",
     "llama-index-tools-wikipedia>=0.3.0",
     "llama-index-tools-wolfram-alpha>=0.3.0",
     "llama-index-tools-yahoo-finance>=0.3.0",
+    "matplotlib>=3.10.1",
+    "numpy>=2.2.5",
     "openai-whisper>=20240930",
+    "opencv-python>=4.11.0.86",
+    "openpyxl>=3.1.5",
     "pandas>=2.2.3",
+    "pyarrow>=20.0.0",
+    "pygame>=2.6.1",
+    "python-chess>=1.999",
     "requests>=2.32.3",
+    "scikit-learn>=1.6.1",
     "scipy>=1.15.2",
+    "seaborn>=0.13.2",
+    "sqlalchemy>=2.0.40",
+    "statsmodels>=0.14.4",
     "sympy>=1.14.0",
     "youtube-transcript-api>=1.0.3",
+    "yt-dlp>=2025.3.31",
 ]

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff