Aedelon commited on
Commit
b8f6b7f
·
verified ·
1 Parent(s): a23082c

agent enhancement (#3)

Browse files

- Agent Improvment (6caec8d95ae9906aea5eb0e0465545da0450e97e)
- Agent Improvment (69c2791ac76a8b773bcb069d9453d775c101ae26)

Files changed (36) hide show
  1. .env +3 -1
  2. __pycache__/app.cpython-311.pyc +0 -0
  3. agents/__pycache__/__init__.cpython-311.pyc +0 -0
  4. agents/__pycache__/advanced_validation_agent.cpython-311.pyc +0 -0
  5. agents/__pycache__/code_agent.cpython-311.pyc +0 -0
  6. agents/__pycache__/figure_interpretation_agent.cpython-311.pyc +0 -0
  7. agents/__pycache__/image_analyzer_agent.cpython-311.pyc +0 -0
  8. agents/__pycache__/long_context_management_agent.cpython-311.pyc +0 -0
  9. agents/__pycache__/math_agent.cpython-311.pyc +0 -0
  10. agents/__pycache__/planner_agent.cpython-311.pyc +0 -0
  11. agents/__pycache__/reasoning_agent.cpython-311.pyc +0 -0
  12. agents/__pycache__/research_agent.cpython-311.pyc +0 -0
  13. agents/__pycache__/role_agent.cpython-311.pyc +0 -0
  14. agents/__pycache__/text_analyzer_agent.cpython-311.pyc +0 -0
  15. agents/__pycache__/verifier_agent.cpython-311.pyc +0 -0
  16. agents/__pycache__/video_analyzer_agent.cpython-311.pyc +0 -0
  17. agents/advanced_validation_agent.py +0 -5
  18. agents/code_agent.py +37 -12
  19. agents/figure_interpretation_agent.py +0 -5
  20. agents/image_analyzer_agent.py +1 -5
  21. agents/long_context_management_agent.py +5 -5
  22. agents/math_agent.py +25 -8
  23. agents/planner_agent.py +76 -14
  24. agents/reasoning_agent.py +71 -11
  25. agents/research_agent.py +91 -156
  26. agents/role_agent.py +0 -3
  27. agents/text_analyzer_agent.py +1 -4
  28. agents/verifier_agent.py +1 -5
  29. agents/video_analyzer_agent.py +334 -0
  30. app.py +110 -93
  31. prompts/code_gen_prompt.txt +44 -3
  32. prompts/planner_agent_prompt.txt +31 -26
  33. prompts/reasoning_agent_prompt.txt +19 -9
  34. prompts/video_analyzer_prompt.txt +85 -0
  35. pyproject.toml +17 -1
  36. uv.lock +0 -0
.env CHANGED
@@ -6,11 +6,13 @@ GOOGLE_API_KEY="AIzaSyACcl4uzlyqz4glW-_uCj0xGPSSH0uloAY" # For Google Custom Sea
6
  GOOGLE_CSE_ID="004c6b8673f0c4dd5" # For Google Custom Search Engine ID
7
  TAVILY_API_KEY="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" # For Tavily Search API
8
  ALPAFLOW_OPENAI_API_KEY="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" # For o4-mini model (or other OpenAI compatible endpoint)
9
- WOLFRAM_ALPHA_APP_ID="YOUR_WOLFRAM_ALPHA_APP_ID" # For WolframAlpha API
10
 
11
  # GAIA Benchmark API
12
  GAIA_API_URL="https://agents-course-unit4-scoring.hf.space"
13
 
 
 
14
  # Model Names (using defaults from original code, can be overridden)
15
  ROLE_EMBED_MODEL="Snowflake/snowflake-arctic-embed-l-v2.0"
16
  ROLE_RERANKER_MODEL="Alibaba-NLP/gte-multilingual-reranker-base"
 
6
  GOOGLE_CSE_ID="004c6b8673f0c4dd5" # For Google Custom Search Engine ID
7
  TAVILY_API_KEY="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" # For Tavily Search API
8
  ALPAFLOW_OPENAI_API_KEY="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" # For o4-mini model (or other OpenAI compatible endpoint)
9
+ WOLFRAM_ALPHA_APP_ID="Y7YG2L-TEU4RGXRVG" # For WolframAlpha API
10
 
11
  # GAIA Benchmark API
12
  GAIA_API_URL="https://agents-course-unit4-scoring.hf.space"
13
 
14
+ LLM_MODEL="models/gemini-1.5-pro"
15
+
16
  # Model Names (using defaults from original code, can be overridden)
17
  ROLE_EMBED_MODEL="Snowflake/snowflake-arctic-embed-l-v2.0"
18
  ROLE_RERANKER_MODEL="Alibaba-NLP/gte-multilingual-reranker-base"
__pycache__/app.cpython-311.pyc ADDED
Binary file (28 kB). View file
 
agents/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/__init__.cpython-311.pyc and b/agents/__pycache__/__init__.cpython-311.pyc differ
 
agents/__pycache__/advanced_validation_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/advanced_validation_agent.cpython-311.pyc and b/agents/__pycache__/advanced_validation_agent.cpython-311.pyc differ
 
agents/__pycache__/code_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/code_agent.cpython-311.pyc and b/agents/__pycache__/code_agent.cpython-311.pyc differ
 
agents/__pycache__/figure_interpretation_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/figure_interpretation_agent.cpython-311.pyc and b/agents/__pycache__/figure_interpretation_agent.cpython-311.pyc differ
 
agents/__pycache__/image_analyzer_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/image_analyzer_agent.cpython-311.pyc and b/agents/__pycache__/image_analyzer_agent.cpython-311.pyc differ
 
agents/__pycache__/long_context_management_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/long_context_management_agent.cpython-311.pyc and b/agents/__pycache__/long_context_management_agent.cpython-311.pyc differ
 
agents/__pycache__/math_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/math_agent.cpython-311.pyc and b/agents/__pycache__/math_agent.cpython-311.pyc differ
 
agents/__pycache__/planner_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/planner_agent.cpython-311.pyc and b/agents/__pycache__/planner_agent.cpython-311.pyc differ
 
agents/__pycache__/reasoning_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/reasoning_agent.cpython-311.pyc and b/agents/__pycache__/reasoning_agent.cpython-311.pyc differ
 
agents/__pycache__/research_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/research_agent.cpython-311.pyc and b/agents/__pycache__/research_agent.cpython-311.pyc differ
 
agents/__pycache__/role_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/role_agent.cpython-311.pyc and b/agents/__pycache__/role_agent.cpython-311.pyc differ
 
agents/__pycache__/text_analyzer_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/text_analyzer_agent.cpython-311.pyc and b/agents/__pycache__/text_analyzer_agent.cpython-311.pyc differ
 
agents/__pycache__/verifier_agent.cpython-311.pyc CHANGED
Binary files a/agents/__pycache__/verifier_agent.cpython-311.pyc and b/agents/__pycache__/verifier_agent.cpython-311.pyc differ
 
agents/__pycache__/video_analyzer_agent.cpython-311.pyc ADDED
Binary file (17 kB). View file
 
agents/advanced_validation_agent.py CHANGED
@@ -2,16 +2,12 @@ import os
2
  import logging
3
  import json
4
  from typing import List, Dict, Optional, Union
5
- from dotenv import load_dotenv
6
 
7
  from llama_index.core.agent.workflow import ReActAgent
8
  from llama_index.core.tools import FunctionTool
9
  from llama_index.llms.google_genai import GoogleGenAI
10
  # Assuming research_agent might be needed for handoff, but not directly imported
11
 
12
- # Load environment variables
13
- load_dotenv()
14
-
15
  # Setup logging
16
  logger = logging.getLogger(__name__)
17
 
@@ -347,7 +343,6 @@ def initialize_advanced_validation_agent() -> ReActAgent:
347
  llm=llm,
348
  system_prompt=system_prompt,
349
  can_handoff_to=valid_handoffs,
350
- verbose=True # Enable verbose logging
351
  )
352
  logger.info("AdvancedValidationAgent initialized successfully.")
353
  return agent
 
2
  import logging
3
  import json
4
  from typing import List, Dict, Optional, Union
 
5
 
6
  from llama_index.core.agent.workflow import ReActAgent
7
  from llama_index.core.tools import FunctionTool
8
  from llama_index.llms.google_genai import GoogleGenAI
9
  # Assuming research_agent might be needed for handoff, but not directly imported
10
 
 
 
 
11
  # Setup logging
12
  logger = logging.getLogger(__name__)
13
 
 
343
  llm=llm,
344
  system_prompt=system_prompt,
345
  can_handoff_to=valid_handoffs,
 
346
  )
347
  logger.info("AdvancedValidationAgent initialized successfully.")
348
  return agent
agents/code_agent.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  import logging
3
- from dotenv import load_dotenv
4
 
5
  from llama_index.core.agent.workflow import CodeActAgent, ReActAgent
6
  from llama_index.core.tools import FunctionTool
@@ -8,9 +7,6 @@ from llama_index.llms.google_genai import GoogleGenAI
8
  from llama_index.llms.openai import OpenAI
9
  from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
10
 
11
- # Load environment variables
12
- load_dotenv()
13
-
14
  # Setup logging
15
  logger = logging.getLogger(__name__)
16
 
@@ -47,12 +43,10 @@ def generate_python_code(prompt: str) -> str:
47
 
48
  # Configuration for code generation LLM
49
  gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
50
- gen_api_key_env = os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")
51
- gen_api_key = os.getenv(gen_api_key_env)
52
 
53
  if not gen_api_key:
54
- logger.error(f"{gen_api_key_env} not found in environment variables for code generation LLM.")
55
- raise ValueError(f"{gen_api_key_env} must be set for code generation")
56
 
57
  # Load the prompt template
58
  default_gen_prompt_template = ("You are a helpful assistant that writes Python code. "
@@ -68,7 +62,10 @@ def generate_python_code(prompt: str) -> str:
68
  try:
69
  llm = OpenAI(
70
  model=gen_llm_model,
71
- api_key=gen_api_key
 
 
 
72
  )
73
  logger.info(f"Using code generation LLM: {gen_llm_model}")
74
  generated_code = llm.complete(input_prompt)
@@ -145,14 +142,42 @@ def initialize_code_agent() -> ReActAgent:
145
  6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
146
  7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
147
  """
148
- # system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)
149
- system_prompt = default_system_prompt # Using inline for now
150
 
151
  agent = ReActAgent(
152
  name="code_agent",
153
  description=(
154
  "Generates Python code using `python_code_generator` and executes it safely using `code_interpreter`. "
155
- "Iteratively debugs and refines code based on execution results."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  ),
157
  # REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
158
  tools=[
 
1
  import os
2
  import logging
 
3
 
4
  from llama_index.core.agent.workflow import CodeActAgent, ReActAgent
5
  from llama_index.core.tools import FunctionTool
 
7
  from llama_index.llms.openai import OpenAI
8
  from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
9
 
 
 
 
10
  # Setup logging
11
  logger = logging.getLogger(__name__)
12
 
 
43
 
44
  # Configuration for code generation LLM
45
  gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
46
+ gen_api_key = os.getenv("OPENAI_API_KEY")
 
47
 
48
  if not gen_api_key:
49
+ raise ValueError("OPENAI_API_KEY environment variable is not set.")
 
50
 
51
  # Load the prompt template
52
  default_gen_prompt_template = ("You are a helpful assistant that writes Python code. "
 
62
  try:
63
  llm = OpenAI(
64
  model=gen_llm_model,
65
+ api_key=gen_api_key,
66
+ reasoning_effort="high",
67
+ temperature=0.25,
68
+ max_tokens=16384
69
  )
70
  logger.info(f"Using code generation LLM: {gen_llm_model}")
71
  generated_code = llm.complete(input_prompt)
 
142
  6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
143
  7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
144
  """
145
+ system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)
 
146
 
147
  agent = ReActAgent(
148
  name="code_agent",
149
  description=(
150
  "Generates Python code using `python_code_generator` and executes it safely using `code_interpreter`. "
151
+ "Iteratively debugs and refines code based on execution results. "
152
+ "The agent has access to the following Python packages:\n"
153
+ "- beautifulsoup4>=4.13.4\n"
154
+ "- certifi>=2025.4.26\n"
155
+ "- datasets>=3.5.1\n"
156
+ "- dotenv>=0.9.9\n"
157
+ "- duckdb>=1.2.2\n"
158
+ "- ffmpeg-python>=0.2.0\n"
159
+ "- gradio[oauth]>=5.28.0\n"
160
+ "- helium>=5.1.1\n"
161
+ "- huggingface>=0.0.1\n"
162
+ "- imageio>=2.37.0\n"
163
+ "- matplotlib>=3.10.1\n"
164
+ "- numpy>=2.2.5\n"
165
+ "- openai-whisper>=20240930\n"
166
+ "- opencv-python>=4.11.0.86\n"
167
+ "- openpyxl>=3.1.5\n"
168
+ "- pandas>=2.2.3\n"
169
+ "- pyarrow>=20.0.0\n"
170
+ "- pygame>=2.6.1\n"
171
+ "- python-chess>=1.999\n"
172
+ "- requests>=2.32.3\n"
173
+ "- scikit-learn>=1.6.1\n"
174
+ "- scipy>=1.15.2\n"
175
+ "- seaborn>=0.13.2\n"
176
+ "- sqlalchemy>=2.0.40\n"
177
+ "- statsmodels>=0.14.4\n"
178
+ "- sympy>=1.14.0\n"
179
+ "- youtube-transcript-api>=1.0.3\n"
180
+ "- yt-dlp>=2025.3.31"
181
  ),
182
  # REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
183
  tools=[
agents/figure_interpretation_agent.py CHANGED
@@ -1,16 +1,11 @@
1
  import os
2
  import logging
3
- from typing import List, Dict, Optional, Union
4
- from dotenv import load_dotenv
5
 
6
  from llama_index.core.agent.workflow import ReActAgent
7
  from llama_index.core.schema import ImageDocument
8
  from llama_index.core.tools import FunctionTool
9
  from llama_index.llms.google_genai import GoogleGenAI
10
 
11
- # Load environment variables
12
- load_dotenv()
13
-
14
  # Setup logging
15
  logger = logging.getLogger(__name__)
16
 
 
1
  import os
2
  import logging
 
 
3
 
4
  from llama_index.core.agent.workflow import ReActAgent
5
  from llama_index.core.schema import ImageDocument
6
  from llama_index.core.tools import FunctionTool
7
  from llama_index.llms.google_genai import GoogleGenAI
8
 
 
 
 
9
  # Setup logging
10
  logger = logging.getLogger(__name__)
11
 
agents/image_analyzer_agent.py CHANGED
@@ -1,13 +1,9 @@
1
  import os
2
  import logging
3
- from dotenv import load_dotenv
4
 
5
  from llama_index.core.agent.workflow import FunctionAgent
6
  from llama_index.llms.google_genai import GoogleGenAI
7
 
8
- # Load environment variables
9
- load_dotenv()
10
-
11
  # Setup logging
12
  logger = logging.getLogger(__name__)
13
 
@@ -69,7 +65,7 @@ def initialize_image_analyzer_agent() -> FunctionAgent:
69
  system_prompt=system_prompt,
70
  # No explicit tools needed if relying on direct multimodal LLM call
71
  # tools=[],
72
- can_handoff_to=["planner_agent", "research_agent", "reasoning_agent"],
73
  )
74
  logger.info("ImageAnalyzerAgent initialized successfully.")
75
  return agent
 
1
  import os
2
  import logging
 
3
 
4
  from llama_index.core.agent.workflow import FunctionAgent
5
  from llama_index.llms.google_genai import GoogleGenAI
6
 
 
 
 
7
  # Setup logging
8
  logger = logging.getLogger(__name__)
9
 
 
65
  system_prompt=system_prompt,
66
  # No explicit tools needed if relying on direct multimodal LLM call
67
  # tools=[],
68
+ can_handoff_to=["planner_agent", "research_agent", "reasoning_agent", "figure_interpretation_agent"],
69
  )
70
  logger.info("ImageAnalyzerAgent initialized successfully.")
71
  return agent
agents/long_context_management_agent.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import logging
3
  import json
4
  from typing import List, Dict, Optional, Union, Literal
5
- from dotenv import load_dotenv
6
 
7
  from llama_index.core.agent.workflow import ReActAgent
8
  from llama_index.core.tools import FunctionTool, QueryEngineTool
@@ -12,8 +11,6 @@ from llama_index.core.node_parser import SentenceSplitter
12
  from llama_index.core.query_engine import RetrieverQueryEngine
13
  from llama_index.core.retrievers import VectorIndexRetriever
14
 
15
- # Load environment variables
16
- load_dotenv()
17
 
18
  # Setup logging
19
  logger = logging.getLogger(__name__)
@@ -348,8 +345,11 @@ def initialize_long_context_management_agent() -> ReActAgent:
348
  agent = ReActAgent(
349
  name="long_context_management_agent",
350
  description=(
351
- "Manages and processes long textual context. Can load text (`load_text_context`), summarize (`summarize_long_context`), "
352
- "extract key info (`extract_key_information`), filter by relevance (`filter_by_relevance`), and answer questions based on the context (`query_context_index`)."
 
 
 
353
  ),
354
  tools=tools,
355
  llm=llm,
 
2
  import logging
3
  import json
4
  from typing import List, Dict, Optional, Union, Literal
 
5
 
6
  from llama_index.core.agent.workflow import ReActAgent
7
  from llama_index.core.tools import FunctionTool, QueryEngineTool
 
11
  from llama_index.core.query_engine import RetrieverQueryEngine
12
  from llama_index.core.retrievers import VectorIndexRetriever
13
 
 
 
14
 
15
  # Setup logging
16
  logger = logging.getLogger(__name__)
 
345
  agent = ReActAgent(
346
  name="long_context_management_agent",
347
  description=(
348
+ "Manages and processes long textual context efficiently. Handles large documents, transcripts, or datasets "
349
+ "by summarizing (`summarize_long_context`), extracting key information (`extract_key_information`), "
350
+ "filtering relevant content (`filter_by_relevance`), and answering questions based on the context (`query_context_index`). "
351
+ "Supports internal indexing for efficient retrieval and repeated queries. Optimized for chunked input processing "
352
+ "and contextual distillation. Only relies on the provided input and avoids external augmentation unless explicitly requested."
353
  ),
354
  tools=tools,
355
  llm=llm,
agents/math_agent.py CHANGED
@@ -1,13 +1,13 @@
1
  import os
2
  import logging
3
- from typing import List, Optional, Union, Dict
4
- from dotenv import load_dotenv
5
 
6
  import sympy as sp
7
  import numpy as np
8
  import scipy.linalg as la
9
  import scipy.special as special
10
- from scipy.integrate import odeint, quad
 
11
  from scipy.stats import binom, norm, poisson
12
  import numpy.fft as fft
13
 
@@ -16,9 +16,6 @@ from llama_index.core.tools import FunctionTool
16
  from llama_index.llms.google_genai import GoogleGenAI
17
  from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
18
 
19
- # Load environment variables
20
- load_dotenv()
21
-
22
  # Setup logging
23
  logger = logging.getLogger(__name__)
24
 
@@ -603,6 +600,26 @@ def get_wolfram_alpha_tools() -> List[FunctionTool]:
603
  _wolfram_alpha_tools = []
604
  return _wolfram_alpha_tools
605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
  # --- Agent Initialization ---
607
 
608
  def initialize_math_agent() -> ReActAgent:
@@ -625,7 +642,7 @@ def initialize_math_agent() -> ReActAgent:
625
  logger.info(f"Using agent LLM: {agent_llm_model}")
626
 
627
  # Combine Python tools and Wolfram Alpha tools
628
- all_tools = get_python_math_tools() + get_wolfram_alpha_tools()
629
  if not all_tools:
630
  logger.warning("No math tools available (Python or WolframAlpha). MathAgent may be ineffective.")
631
 
@@ -661,7 +678,7 @@ def initialize_math_agent() -> ReActAgent:
661
  tools=all_tools,
662
  llm=llm,
663
  system_prompt=system_prompt,
664
- can_handoff_to=["planner_agent"],
665
  )
666
  logger.info("MathAgent initialized successfully.")
667
  return agent
 
1
  import os
2
  import logging
3
+ from typing import List, Dict
 
4
 
5
  import sympy as sp
6
  import numpy as np
7
  import scipy.linalg as la
8
  import scipy.special as special
9
+ from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
10
+ from scipy.integrate import quad
11
  from scipy.stats import binom, norm, poisson
12
  import numpy.fft as fft
13
 
 
16
  from llama_index.llms.google_genai import GoogleGenAI
17
  from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
18
 
 
 
 
19
  # Setup logging
20
  logger = logging.getLogger(__name__)
21
 
 
600
  _wolfram_alpha_tools = []
601
  return _wolfram_alpha_tools
602
 
603
+
604
+ # Use LlamaIndex's built-in Code Interpreter Tool Spec for safe execution
605
+ # This assumes the necessary environment (e.g., docker) for the spec is available
606
+ try:
607
+ code_interpreter_spec = CodeInterpreterToolSpec()
608
+ # Get the tool(s) from the spec. It might return multiple tools.
609
+ code_interpreter_tools = code_interpreter_spec.to_tool_list()
610
+ if not code_interpreter_tools:
611
+ raise RuntimeError("CodeInterpreterToolSpec did not return any tools.")
612
+ # Assuming the primary tool is the first one, or find by name if necessary
613
+ code_interpreter_tool = next((t for t in code_interpreter_tools if t.metadata.name == "code_interpreter"), None)
614
+ if code_interpreter_tool is None:
615
+ raise RuntimeError("Could not find 'code_interpreter' tool in CodeInterpreterToolSpec results.")
616
+ logger.info("CodeInterpreterToolSpec initialized successfully.")
617
+ except Exception as e:
618
+ logger.error(f"Failed to initialize CodeInterpreterToolSpec: {e}", exc_info=True)
619
+ # Fallback: Define a dummy tool or raise error to prevent agent start?
620
+ # For now, let initialization fail if the safe interpreter isn't available.
621
+ raise RuntimeError("CodeInterpreterToolSpec failed to initialize. Cannot create code_agent.") from e
622
+
623
  # --- Agent Initialization ---
624
 
625
  def initialize_math_agent() -> ReActAgent:
 
642
  logger.info(f"Using agent LLM: {agent_llm_model}")
643
 
644
  # Combine Python tools and Wolfram Alpha tools
645
+ all_tools = get_python_math_tools() + get_wolfram_alpha_tools() + [code_interpreter_tool]
646
  if not all_tools:
647
  logger.warning("No math tools available (Python or WolframAlpha). MathAgent may be ineffective.")
648
 
 
678
  tools=all_tools,
679
  llm=llm,
680
  system_prompt=system_prompt,
681
+ can_handoff_to=["planner_agent", "reasoning_agent"],
682
  )
683
  logger.info("MathAgent initialized successfully.")
684
  return agent
agents/planner_agent.py CHANGED
@@ -1,14 +1,11 @@
1
  import os
2
  import logging
3
  from typing import List, Dict
4
- from dotenv import load_dotenv
5
 
6
  from llama_index.core.agent.workflow import ReActAgent
7
  from llama_index.core.tools import FunctionTool
8
  from llama_index.llms.google_genai import GoogleGenAI
9
 
10
- # Load environment variables
11
- load_dotenv()
12
 
13
  # Setup logging
14
  logger = logging.getLogger(__name__)
@@ -48,7 +45,7 @@ def plan(objective: str) -> List[str]:
48
  gemini_api_key = os.getenv("GEMINI_API_KEY")
49
  if not gemini_api_key:
50
  logger.error("GEMINI_API_KEY not found for planning tool LLM.")
51
- return ["Error: GEMINI_API_KEY not set for planning."]
52
 
53
  # Prompt for the LLM to generate sub-steps
54
  input_prompt = (
@@ -84,22 +81,23 @@ def plan(objective: str) -> List[str]:
84
 
85
  if not sub_steps:
86
  logger.warning("LLM generated no sub-steps for the objective.")
87
- return ["Error: Failed to generate sub-steps."]
88
 
89
  logger.info(f"Generated {len(sub_steps)} sub-steps.")
 
90
  return sub_steps
91
 
92
  except Exception as e:
93
  logger.error(f"LLM call failed during planning: {e}", exc_info=True)
94
- return [f"Error during planning: {e}"]
95
 
96
- def synthesize_and_respond(results: List[Dict[str, str]]) -> str:
97
  """
98
  Aggregate results from sub-steps into a coherent final report using an LLM.
99
  Args:
100
  results (List[Dict[str, str]]): List of dictionaries, each with "sub_step" and "answer" keys.
101
  Returns:
102
- str: A unified, well-structured response, or an error message.
103
  """
104
  logger.info(f"Synthesizing results from {len(results)} sub-steps...")
105
  if not results:
@@ -121,7 +119,9 @@ def synthesize_and_respond(results: List[Dict[str, str]]) -> str:
121
  return "Error: GEMINI_API_KEY not set for synthesis."
122
 
123
  # Prompt for the LLM
124
- input_prompt = f"""You are an expert synthesizer. Given the following sub-steps and their answers derived from an initial objective, produce a single, coherent, comprehensive final report that addresses the original objective:
 
 
125
 
126
  --- SUB-STEP RESULTS ---
127
  {summary_blocks.strip()}
@@ -140,10 +140,59 @@ def synthesize_and_respond(results: List[Dict[str, str]]) -> str:
140
  logger.error(f"LLM call failed during synthesis: {e}", exc_info=True)
141
  return f"Error during synthesis: {e}"
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  # --- Tool Definitions ---
144
  synthesize_tool = FunctionTool.from_defaults(
145
- fn=synthesize_and_respond,
146
- name="synthesize_and_respond",
147
  description=(
148
  "Aggregates results from multiple sub-steps into a final coherent report. "
149
  "Input: results (List[Dict[str, str]]) where each dict has \"sub_step\" and \"answer\". "
@@ -160,6 +209,15 @@ generate_substeps_tool = FunctionTool.from_defaults(
160
  )
161
  )
162
 
 
 
 
 
 
 
 
 
 
163
  # --- Agent Initialization ---
164
  def initialize_planner_agent() -> ReActAgent:
165
  """Initializes the Planner Agent."""
@@ -185,7 +243,7 @@ def initialize_planner_agent() -> ReActAgent:
185
  logger.warning("Using default/fallback system prompt for PlannerAgent.")
186
 
187
  # Define available tools
188
- tools = [generate_substeps_tool, synthesize_tool]
189
 
190
  # Define valid handoff targets
191
  valid_handoffs = [
@@ -196,7 +254,11 @@ def initialize_planner_agent() -> ReActAgent:
196
  "image_analyzer_agent",
197
  "text_analyzer_agent",
198
  "verifier_agent",
199
- "reasoning_agent"
 
 
 
 
200
  ]
201
 
202
  agent = ReActAgent(
@@ -204,7 +266,7 @@ def initialize_planner_agent() -> ReActAgent:
204
  description=(
205
  "Strategically plans tasks by breaking down objectives into sub-steps using `generate_substeps`. "
206
  "Orchestrates execution by handing off sub-steps to specialized agents. "
207
- "Synthesizes final results using `synthesize_and_respond`."
208
  ),
209
  tools=tools,
210
  llm=llm,
 
1
  import os
2
  import logging
3
  from typing import List, Dict
 
4
 
5
  from llama_index.core.agent.workflow import ReActAgent
6
  from llama_index.core.tools import FunctionTool
7
  from llama_index.llms.google_genai import GoogleGenAI
8
 
 
 
9
 
10
  # Setup logging
11
  logger = logging.getLogger(__name__)
 
45
  gemini_api_key = os.getenv("GEMINI_API_KEY")
46
  if not gemini_api_key:
47
  logger.error("GEMINI_API_KEY not found for planning tool LLM.")
48
+ return "Error: GEMINI_API_KEY not set for planning."
49
 
50
  # Prompt for the LLM to generate sub-steps
51
  input_prompt = (
 
81
 
82
  if not sub_steps:
83
  logger.warning("LLM generated no sub-steps for the objective.")
84
+ return "Error: Failed to generate sub-steps."
85
 
86
  logger.info(f"Generated {len(sub_steps)} sub-steps.")
87
+
88
  return sub_steps
89
 
90
  except Exception as e:
91
  logger.error(f"LLM call failed during planning: {e}", exc_info=True)
92
+ return f"Error during planning: {e}"
93
 
94
+ def synthesize_and_report(results: List[Dict[str, str]]) -> str:
95
  """
96
  Aggregate results from sub-steps into a coherent final report using an LLM.
97
  Args:
98
  results (List[Dict[str, str]]): List of dictionaries, each with "sub_step" and "answer" keys.
99
  Returns:
100
+ str: A unified, well-structured report, or an error message.
101
  """
102
  logger.info(f"Synthesizing results from {len(results)} sub-steps...")
103
  if not results:
 
119
  return "Error: GEMINI_API_KEY not set for synthesis."
120
 
121
  # Prompt for the LLM
122
+ input_prompt = f"""You are an expert synthesizer. Given the following sub-steps and their answers derived
123
+ from an initial objective, produce a single, coherent, comprehensive final report that
124
+ addresses the original objective:
125
 
126
  --- SUB-STEP RESULTS ---
127
  {summary_blocks.strip()}
 
140
  logger.error(f"LLM call failed during synthesis: {e}", exc_info=True)
141
  return f"Error during synthesis: {e}"
142
 
143
+ def answer_question(question: str) -> str:
144
+ """
145
+ Answer any question by following this strict format:
146
+ 1. Include your chain of thought (your reasoning steps).
147
+ 2. End your reply with the exact template:
148
+ FINAL ANSWER: [YOUR FINAL ANSWER]
149
+ YOUR FINAL ANSWER must be:
150
+ - A number, or
151
+ - As few words as possible, or
152
+ - A comma-separated list of numbers and/or strings.
153
+ Formatting rules:
154
+ * If asked for a number, do not use commas or units (e.g., $, %), unless explicitly requested.
155
+ * If asked for a string, do not include articles or abbreviations (e.g., city names), and write digits in plain text.
156
+ * If asked for a comma-separated list, apply the above rules to each element.
157
+ This tool should be invoked immediately after completing the final planning sub-step.
158
+ """
159
+ logger.info(f"Answering question: {question[:100]}")
160
+
161
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
162
+ if not gemini_api_key:
163
+ logger.error("GEMINI_API_KEY not set for answer_question tool.")
164
+ return "Error: GEMINI_API_KEY not set."
165
+
166
+ model_name = os.getenv("ANSWER_TOOL_LLM_MODEL", "models/gemini-1.5-pro")
167
+
168
+ # Build the assistant prompt enforcing the required format
169
+ assistant_prompt = (
170
+ "You are a general AI assistant. I will ask you a question. "
171
+ "Report your thoughts, and finish your answer with the following template: "
172
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. "
173
+ "YOUR FINAL ANSWER should be a number OR as few words as possible "
174
+ "OR a comma separated list of numbers and/or strings. "
175
+ "If you are asked for a number, don't use commas for thousands or any units like $ or % unless specified. "
176
+ "If you are asked for a string, omit articles and abbreviations, and write digits in plain text. "
177
+ "If you are asked for a comma separated list, apply these rules to each element.\n\n"
178
+ f"Question: {question}\n"
179
+ "Answer:"
180
+ )
181
+
182
+ try:
183
+ llm = GoogleGenAI(api_key=gemini_api_key, model=model_name)
184
+ logger.info(f"Using answer LLM: {model_name}")
185
+ response = llm.complete(assistant_prompt)
186
+ logger.info("Answer generated successfully.")
187
+ return response.text
188
+ except Exception as e:
189
+ logger.error(f"LLM call failed during answer generation: {e}", exc_info=True)
190
+ return f"Error during answer generation: {e}"
191
+
192
  # --- Tool Definitions ---
193
  synthesize_tool = FunctionTool.from_defaults(
194
+ fn=synthesize_and_report,
195
+ name="synthesize_and_report",
196
  description=(
197
  "Aggregates results from multiple sub-steps into a final coherent report. "
198
  "Input: results (List[Dict[str, str]]) where each dict has \"sub_step\" and \"answer\". "
 
209
  )
210
  )
211
 
212
+ answer_question = FunctionTool.from_defaults(
213
+ fn=answer_question,
214
+ name="answer_question",
215
+ description=(
216
+ "Répond à une question quelconque et renvoie le texte complet, "
217
+ "terminant toujours par « FINAL ANSWER: ... » conformément aux règles."
218
+ ),
219
+ )
220
+
221
  # --- Agent Initialization ---
222
  def initialize_planner_agent() -> ReActAgent:
223
  """Initializes the Planner Agent."""
 
243
  logger.warning("Using default/fallback system prompt for PlannerAgent.")
244
 
245
  # Define available tools
246
+ tools = [generate_substeps_tool, synthesize_tool, answer_question]
247
 
248
  # Define valid handoff targets
249
  valid_handoffs = [
 
254
  "image_analyzer_agent",
255
  "text_analyzer_agent",
256
  "verifier_agent",
257
+ "reasoning_agent",
258
+ "figure_interpretation_agent",
259
+ "long_context_management_agent",
260
+ "advanced_validation_agent",
261
+ "video_analyzer_agent"
262
  ]
263
 
264
  agent = ReActAgent(
 
266
  description=(
267
  "Strategically plans tasks by breaking down objectives into sub-steps using `generate_substeps`. "
268
  "Orchestrates execution by handing off sub-steps to specialized agents. "
269
+ "Synthesizes final results using `synthesize_and_report`."
270
  ),
271
  tools=tools,
272
  llm=llm,
agents/reasoning_agent.py CHANGED
@@ -1,15 +1,11 @@
1
  import os
2
  import logging
3
- from dotenv import load_dotenv
4
 
5
  from llama_index.core.agent.workflow import ReActAgent
6
  from llama_index.core.tools import FunctionTool
7
  from llama_index.llms.google_genai import GoogleGenAI
8
  from llama_index.llms.openai import OpenAI
9
 
10
- # Load environment variables
11
- load_dotenv()
12
-
13
  # Setup logging
14
  logger = logging.getLogger(__name__)
15
 
@@ -45,7 +41,7 @@ def reasoning_tool_fn(context: str) -> str:
45
 
46
  # Configuration for the reasoning LLM (OpenAI in the original)
47
  reasoning_llm_model = os.getenv("REASONING_LLM_MODEL", "gpt-4o-mini") # Use gpt-4o-mini as default
48
- openai_api_key = os.getenv("ALPAFLOW_OPENAI_API_KEY") # Specific key from original code
49
 
50
  if not openai_api_key:
51
  logger.error("ALPAFLOW_OPENAI_API_KEY not found for reasoning tool LLM.")
@@ -75,7 +71,9 @@ def reasoning_tool_fn(context: str) -> str:
75
  llm = OpenAI(
76
  model=reasoning_llm_model,
77
  api_key=openai_api_key,
78
- # reasoning_effort="high" # Add if needed and supported by the specific OpenAI integration
 
 
79
  )
80
  logger.info(f"Using reasoning LLM: {reasoning_llm_model}")
81
  response = llm.complete(reasoning_prompt)
@@ -85,6 +83,57 @@ def reasoning_tool_fn(context: str) -> str:
85
  logger.error(f"Error during reasoning tool LLM call: {e}", exc_info=True)
86
  return f"Error during reasoning: {e}"
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  # --- Tool Definition ---
89
  reasoning_tool = FunctionTool.from_defaults(
90
  fn=reasoning_tool_fn,
@@ -95,6 +144,15 @@ reasoning_tool = FunctionTool.from_defaults(
95
  ),
96
  )
97
 
 
 
 
 
 
 
 
 
 
98
  # --- Agent Initialization ---
99
  def initialize_reasoning_agent() -> ReActAgent:
100
  """Initializes the Reasoning Agent."""
@@ -122,15 +180,17 @@ def initialize_reasoning_agent() -> ReActAgent:
122
  agent = ReActAgent(
123
  name="reasoning_agent",
124
  description=(
125
- "A pure reasoning agent that uses the `reasoning_tool` for detailed chain-of-thought analysis "
126
- "on the provided context, then hands off the result to the `planner_agent`."
 
 
127
  ),
128
- tools=[reasoning_tool], # Only has access to the reasoning tool
129
  llm=llm,
130
  system_prompt=system_prompt,
131
- can_handoff_to=["planner_agent"],
132
  )
133
- logger.info("ReasoningAgent initialized successfully.")
134
  return agent
135
 
136
  except Exception as e:
 
1
  import os
2
  import logging
 
3
 
4
  from llama_index.core.agent.workflow import ReActAgent
5
  from llama_index.core.tools import FunctionTool
6
  from llama_index.llms.google_genai import GoogleGenAI
7
  from llama_index.llms.openai import OpenAI
8
 
 
 
 
9
  # Setup logging
10
  logger = logging.getLogger(__name__)
11
 
 
41
 
42
  # Configuration for the reasoning LLM (OpenAI in the original)
43
  reasoning_llm_model = os.getenv("REASONING_LLM_MODEL", "gpt-4o-mini") # Use gpt-4o-mini as default
44
+ openai_api_key = os.getenv("OPENAI_API_KEY")
45
 
46
  if not openai_api_key:
47
  logger.error("ALPAFLOW_OPENAI_API_KEY not found for reasoning tool LLM.")
 
71
  llm = OpenAI(
72
  model=reasoning_llm_model,
73
  api_key=openai_api_key,
74
+ reasoning_effort="high",
75
+ temperature=0.25,
76
+ max_tokens=16384
77
  )
78
  logger.info(f"Using reasoning LLM: {reasoning_llm_model}")
79
  response = llm.complete(reasoning_prompt)
 
83
  logger.error(f"Error during reasoning tool LLM call: {e}", exc_info=True)
84
  return f"Error during reasoning: {e}"
85
 
86
+
87
+ def answer_question(question: str) -> str:
88
+ """
89
+ Answer any question by following this strict format:
90
+ 1. Include your chain of thought (your reasoning steps).
91
+ 2. End your reply with the exact template:
92
+ FINAL ANSWER: [YOUR FINAL ANSWER]
93
+ YOUR FINAL ANSWER must be:
94
+ - A number, or
95
+ - As few words as possible, or
96
+ - A comma-separated list of numbers and/or strings.
97
+ Formatting rules:
98
+ * If asked for a number, do not use commas or units (e.g., $, %), unless explicitly requested.
99
+ * If asked for a string, do not include articles or abbreviations (e.g., city names), and write digits in plain text.
100
+ * If asked for a comma-separated list, apply the above rules to each element.
101
+ This tool should be invoked immediately after completing the final planning sub-step.
102
+ """
103
+ logger.info(f"Answering question: {question[:100]}")
104
+
105
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
106
+ if not gemini_api_key:
107
+ logger.error("GEMINI_API_KEY not set for answer_question tool.")
108
+ return "Error: GEMINI_API_KEY not set."
109
+
110
+ model_name = os.getenv("ANSWER_TOOL_LLM_MODEL", "models/gemini-1.5-pro")
111
+
112
+ # Build the assistant prompt enforcing the required format
113
+ assistant_prompt = (
114
+ "You are a general AI assistant. I will ask you a question. "
115
+ "Report your thoughts, and finish your answer with the following template: "
116
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. "
117
+ "YOUR FINAL ANSWER should be a number OR as few words as possible "
118
+ "OR a comma separated list of numbers and/or strings. "
119
+ "If you are asked for a number, don't use commas for thousands or any units like $ or % unless specified. "
120
+ "If you are asked for a string, omit articles and abbreviations, and write digits in plain text. "
121
+ "If you are asked for a comma separated list, apply these rules to each element.\n\n"
122
+ f"Question: {question}\n"
123
+ "Answer:"
124
+ )
125
+
126
+ try:
127
+ llm = GoogleGenAI(api_key=gemini_api_key, model=model_name)
128
+ logger.info(f"Using answer LLM: {model_name}")
129
+ response = llm.complete(assistant_prompt)
130
+ logger.info("Answer generated successfully.")
131
+ return response.text
132
+ except Exception as e:
133
+ logger.error(f"LLM call failed during answer generation: {e}", exc_info=True)
134
+ return f"Error during answer generation: {e}"
135
+
136
+
137
  # --- Tool Definition ---
138
  reasoning_tool = FunctionTool.from_defaults(
139
  fn=reasoning_tool_fn,
 
144
  ),
145
  )
146
 
147
+ answer_question = FunctionTool.from_defaults(
148
+ fn=answer_question,
149
+ name="answer_question",
150
+ description=(
151
+ "Use this tool to answer any question, reporting your reasoning steps and ending with 'FINAL ANSWER: ...'. "
152
+ "Invoke this tool immediately after the final sub-step of planning is complete."
153
+ ),
154
+ )
155
+
156
  # --- Agent Initialization ---
157
  def initialize_reasoning_agent() -> ReActAgent:
158
  """Initializes the Reasoning Agent."""
 
180
  agent = ReActAgent(
181
  name="reasoning_agent",
182
  description=(
183
+ "An autonomous reasoning specialist that applies `reasoning_tool` to perform "
184
+ "in-depth chain-of-thought analysis on incoming queries or contexts, "
185
+ "then seamlessly delegates the synthesized insights to `planner_agent` "
186
+ "or `long_context_management_agent` for subsequent task orchestration."
187
  ),
188
+ tools=[reasoning_tool, answer_question],
189
  llm=llm,
190
  system_prompt=system_prompt,
191
+ can_handoff_to=["planner_agent", "long_context_management_agent", "advanced_validation_agent", "code_agent"],
192
  )
193
+
194
  return agent
195
 
196
  except Exception as e:
agents/research_agent.py CHANGED
@@ -3,7 +3,6 @@ import time
3
  import logging
4
  import re # Import regex for video ID extraction
5
  from typing import List, Optional, Dict # Added Dict
6
- from dotenv import load_dotenv
7
 
8
  from llama_index.core.agent.workflow import ReActAgent
9
  from llama_index.core.tools import FunctionTool
@@ -27,89 +26,10 @@ except ImportError:
27
  logging.warning("Selenium or Helium not installed. Browser interaction tools will be unavailable.")
28
  SELENIUM_AVAILABLE = False
29
 
30
- # Attempt to import YouTube transcript API
31
- try:
32
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
33
- YOUTUBE_TRANSCRIPT_API_AVAILABLE = True
34
- except ImportError:
35
- logging.warning("youtube-transcript-api not installed. YouTube transcript tool will be unavailable.")
36
- YOUTUBE_TRANSCRIPT_API_AVAILABLE = False
37
-
38
- # Load environment variables
39
- load_dotenv()
40
 
41
  # Setup logging
42
  logger = logging.getLogger(__name__)
43
 
44
- # --- Helper function to extract YouTube Video ID ---
45
- def extract_video_id(url: str) -> Optional[str]:
46
- """Extracts the YouTube video ID from various URL formats."""
47
- # Standard watch URL: https://www.youtube.com/watch?v=VIDEO_ID
48
- match = re.search(r'(?:v=|/v/|embed/|youtu\.be/|/shorts/)([A-Za-z0-9_-]+)', url)
49
- if match:
50
- return match.group(1)
51
- return None
52
-
53
- # --- YouTube Transcript Tool ---
54
- def get_youtube_transcript(video_url_or_id: str, languages=None) -> str:
55
- """Fetches the transcript for a YouTube video using its URL or video ID.
56
- Specify preferred languages as a list (e.g., ["en", "es"]).
57
- Returns the transcript text or an error message.
58
- """
59
- if languages is None:
60
- languages = ["en"]
61
- if not YOUTUBE_TRANSCRIPT_API_AVAILABLE:
62
- return "Error: youtube-transcript-api library is required but not installed."
63
-
64
- logger.info(f"Attempting to fetch YouTube transcript for: {video_url_or_id}")
65
- video_id = extract_video_id(video_url_or_id)
66
- if not video_id:
67
- # Assume it might be an ID already if extraction fails
68
- if re.match(r"^[a-zA-Z0-9_\-]+$", video_url_or_id):
69
- video_id = video_url_or_id
70
- logger.info("Input treated as video ID.")
71
- else:
72
- logger.error(f"Could not extract valid YouTube video ID from: {video_url_or_id}")
73
- return f"Error: Invalid YouTube URL or Video ID format: {video_url_or_id}"
74
-
75
- try:
76
- # Fetch available transcripts
77
- api = YouTubeTranscriptApi()
78
- transcript_list = api.list(video_id)
79
-
80
- # Try to find a transcript in the specified languages
81
- transcript = transcript_list.find_transcript(languages)
82
-
83
- # Fetch the actual transcript data (list of dicts)
84
- transcript_data = transcript.fetch()
85
-
86
- # Combine the text parts into a single string
87
- full_transcript = " ".join(snippet.text for snippet in transcript_data)
88
-
89
- full_transcript = " ".join(snippet.text for snippet in transcript_data)
90
- logger.info(f"Successfully fetched transcript for video ID {video_id} in language {transcript.language}.")
91
- return full_transcript
92
-
93
- except TranscriptsDisabled:
94
- logger.warning(f"Transcripts are disabled for video ID: {video_id}")
95
- return f"Error: Transcripts are disabled for this video (ID: {video_id})."
96
- except NoTranscriptFound as e:
97
- logger.warning(f"No transcript found for video ID {video_id} in languages {languages}. Available: {e.available_transcripts}")
98
- # Try fetching any available transcript if specific languages failed
99
- try:
100
- logger.info(f"Attempting to fetch any available transcript for {video_id}")
101
- any_transcript = transcript_list.find_generated_transcript(transcript_list.manually_created_transcripts.keys() or transcript_list.generated_transcripts.keys())
102
- any_transcript_data = any_transcript.fetch()
103
- full_transcript = " ".join([item["text"] for item in any_transcript_data])
104
- logger.info(f"Successfully fetched fallback transcript for video ID {video_id} in language {any_transcript.language}.")
105
- return full_transcript
106
- except Exception as fallback_e:
107
- logger.error(f"Could not find any transcript for video ID {video_id}. Original error: {e}. Fallback error: {fallback_e}")
108
- return f"Error: No transcript found for video ID {video_id} in languages {languages} or any fallback language."
109
- except Exception as e:
110
- logger.error(f"Unexpected error fetching transcript for video ID {video_id}: {e}", exc_info=True)
111
- return f"Error fetching transcript: {e}"
112
-
113
  # --- Browser Interaction Tools (Conditional on Selenium/Helium availability) ---
114
 
115
  # Global browser instance (managed by initializer)
@@ -286,7 +206,55 @@ def close_popups() -> str:
286
  time.sleep(0.5)
287
  return "Sent ESC key press."
288
 
289
- # --- Search Engine & Data Source Tools ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
  # --- Agent Initializer Class ---
292
  class ResearchAgentInitializer:
@@ -296,7 +264,6 @@ class ResearchAgentInitializer:
296
  self.browser_tools = []
297
  self.search_tools = []
298
  self.datasource_tools = []
299
- self.youtube_tool = None # Added for YouTube tool
300
 
301
  # Initialize LLM
302
  self._initialize_llm()
@@ -311,7 +278,15 @@ class ResearchAgentInitializer:
311
  # Initialize Search/Datasource Tools
312
  self._create_search_tools()
313
  self._create_datasource_tools()
314
- self._create_youtube_tool() # Added
 
 
 
 
 
 
 
 
315
 
316
  logger.info("ResearchAgent resources initialized.")
317
 
@@ -366,7 +341,7 @@ class ResearchAgentInitializer:
366
  self.browser_tools = [
367
  FunctionTool.from_defaults(fn=visit, name="visit_url"), # Renamed for clarity
368
  FunctionTool.from_defaults(fn=get_text_by_css, name="get_text_by_css"),
369
- FunctionTool.from_defaults(fn=get_page_html, name="get_page_html"),
370
  FunctionTool.from_defaults(fn=click_element_by_css, name="click_element_by_css"),
371
  FunctionTool.from_defaults(fn=input_text_by_css, name="input_text_by_css"),
372
  FunctionTool.from_defaults(fn=scroll_page, name="scroll_page"),
@@ -444,28 +419,14 @@ class ResearchAgentInitializer:
444
 
445
  logger.info(f"Created {len(self.datasource_tools)} specific data source tools.")
446
 
447
- def _create_youtube_tool(self): # Added method
448
- if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
449
- self.youtube_tool = FunctionTool.from_defaults(
450
- fn=get_youtube_transcript,
451
- name="get_youtube_transcript",
452
- description=(
453
- "(YouTube) Fetches the transcript text for a given YouTube video URL or video ID. "
454
- "Specify preferred languages (e.g., [\"en\", \"es\"]). Returns transcript or error."
455
- )
456
- )
457
- logger.info("Created YouTube transcript tool.")
458
- else:
459
- self.youtube_tool = None
460
- logger.warning("YouTube transcript tool disabled because youtube-transcript-api is not installed.")
461
 
462
  def get_agent(self) -> ReActAgent:
463
  """Creates and returns the configured ReActAgent for research."""
464
  logger.info("Creating ResearchAgent ReActAgent instance...")
465
 
466
  all_tools = self.browser_tools + self.search_tools + self.datasource_tools
467
- if self.youtube_tool: # Add YouTube tool if available
468
- all_tools.append(self.youtube_tool)
469
 
470
  if not all_tools:
471
  logger.warning("No tools available for ResearchAgent. It will likely be unable to function.")
@@ -474,29 +435,43 @@ class ResearchAgentInitializer:
474
  # Updated prompt to include YouTube tool
475
  system_prompt = """\
476
  You are ResearchAgent, an autonomous web research assistant. Your goal is to gather information accurately and efficiently using the available tools.
477
-
478
  Available Tool Categories:
479
  - (Browser): Tools for direct web page interaction (visiting URLs, clicking, scrolling, extracting text/HTML, inputting text).
480
  - (Search): Tools for querying search engines (Google, DuckDuckGo, Tavily).
481
  - (Wikipedia): Tools for searching and loading Wikipedia pages.
482
  - (YahooFinance): Tools for retrieving financial data (balance sheets, income statements, stock info, news).
483
  - (ArXiv): Tool for searching academic papers on ArXiv.
484
- - (YouTube): Tool for fetching video transcripts (`get_youtube_transcript`).
485
-
486
- Workflow:
487
- 1. **Thought**: Analyze the research goal. Break it down if necessary. Choose the *single best tool* for the *next immediate step*. Explain your choice. Consider the information needed and which tool provides it most directly (e.g., use YahooFinance for stock prices, Google/DDG for general web search, Tavily for document search, ArXiv for papers, Wikipedia for encyclopedic info, YouTube for video transcripts, Browser tools for specific website interaction).
488
- 2. **Action**: Call the chosen tool with the correct arguments. Ensure inputs match the tool's requirements (e.g., URL or video ID for YouTube).
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  3. **Observation**: Examine the tool's output. Extract the relevant information. Check for errors.
490
- 4. **Reflect & Iterate**: Does the observation satisfy the immediate goal? Do you have enough information for the overall research task? If not, return to step 1 (Thought) to plan the *next* single step. If a tool failed, consider why and try an alternative tool or approach.
491
- 5. **Synthesize**: Once all necessary information is gathered, synthesize the findings into a coherent answer to the original research goal.
492
- 6. **Hand-Off**: Pass the synthesized findings to the appropriate next agent: **code_agent** (for coding), **math_agent** (for math), **text_analyzer_agent** (for text analysis), **planner_agent** (for planning/synthesis), or **reasoning_agent** (for logic/reasoning).
493
-
494
- Constraints:
 
495
  - Use only one tool per Action step.
496
  - Think step-by-step.
497
  - If using browser tools, start with `visit_url`.
498
- - Be mindful of potential errors and try alternative tools if one fails.
499
- - Synthesize results *before* handing off.
500
  """
501
 
502
  agent = ReActAgent(
@@ -512,6 +487,8 @@ class ResearchAgentInitializer:
512
  "code_agent",
513
  "math_agent",
514
  "text_analyzer_agent", # Added based on original prompt
 
 
515
  "planner_agent",
516
  "reasoning_agent"
517
  ],
@@ -576,47 +553,5 @@ if __name__ == "__main__":
576
  missing_optional = [key for key in optional_keys if not os.getenv(key)]
577
  if missing_optional:
578
  print(f"Warning: Optional environment variable(s) not set: {', '.join(missing_optional)}. Some tools may be unavailable.")
579
-
580
- test_agent = None
581
- try:
582
- # Test YouTube transcript tool directly
583
- if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
584
- print("\nTesting YouTube transcript tool...")
585
- # Example video: "Attention is All You Need" paper explanation
586
- yt_url = "https://www.youtube.com/watch?v=TQQlZhbC5ps"
587
- transcript = get_youtube_transcript(yt_url)
588
- if not transcript.startswith("Error:"):
589
- print(f"Transcript fetched (first 500 chars):\n{transcript[:500]}...")
590
- else:
591
- print(f"YouTube Transcript Fetch Failed: {transcript}")
592
- else:
593
- print("\nSkipping YouTube transcript test as youtube-transcript-api is not available.")
594
-
595
- # Initialize agent AFTER testing standalone functions
596
- test_agent = initialize_research_agent()
597
- print("\nResearch Agent initialized successfully for testing.")
598
-
599
- # Example test (requires browser tools to be available)
600
- # if SELENIUM_AVAILABLE:
601
- # print("\nTesting browser visit...")
602
- # result = test_agent.chat("Visit https://example.com and tell me the main heading text using CSS selector 'h1'")
603
- # print(f"Test query result: {result}")
604
- # else:
605
- # print("\nSkipping browser test as Selenium/Helium are not available.")
606
-
607
- # Example search test (requires GOOGLE keys)
608
- # if os.getenv("GOOGLE_API_KEY") and os.getenv("GOOGLE_CSE_ID"):
609
- # print("\nTesting Google Search...")
610
- # result_search = test_agent.chat("Search for 'LlamaIndex Agent Workflow'")
611
- # print(f"Search test result: {result_search}")
612
- # else:
613
- # print("\nSkipping Google Search test as API keys are not set.")
614
-
615
- except Exception as e:
616
- print(f"Error during testing: {e}")
617
- finally:
618
- # Clean up browser if it was started
619
- if test_agent:
620
- print("\nCleaning up resources...")
621
- cleanup_research_agent_resources()
622
 
 
3
  import logging
4
  import re # Import regex for video ID extraction
5
  from typing import List, Optional, Dict # Added Dict
 
6
 
7
  from llama_index.core.agent.workflow import ReActAgent
8
  from llama_index.core.tools import FunctionTool
 
26
  logging.warning("Selenium or Helium not installed. Browser interaction tools will be unavailable.")
27
  SELENIUM_AVAILABLE = False
28
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # Setup logging
31
  logger = logging.getLogger(__name__)
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # --- Browser Interaction Tools (Conditional on Selenium/Helium availability) ---
34
 
35
  # Global browser instance (managed by initializer)
 
206
  time.sleep(0.5)
207
  return "Sent ESC key press."
208
 
209
+ def answer_question(question: str) -> str:
210
+ """
211
+ Answer any question by following this strict format:
212
+ 1. Include your chain of thought (your reasoning steps).
213
+ 2. End your reply with the exact template:
214
+ FINAL ANSWER: [YOUR FINAL ANSWER]
215
+ YOUR FINAL ANSWER must be:
216
+ - A number, or
217
+ - As few words as possible, or
218
+ - A comma-separated list of numbers and/or strings.
219
+ Formatting rules:
220
+ * If asked for a number, do not use commas or units (e.g., $, %), unless explicitly requested.
221
+ * If asked for a string, do not include articles or abbreviations (e.g., city names), and write digits in plain text.
222
+ * If asked for a comma-separated list, apply the above rules to each element.
223
+ This tool should be invoked immediately after completing the final planning sub-step.
224
+ """
225
+ logger.info(f"Answering question: {question[:100]}")
226
+
227
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
228
+ if not gemini_api_key:
229
+ logger.error("GEMINI_API_KEY not set for answer_question tool.")
230
+ return "Error: GEMINI_API_KEY not set."
231
+
232
+ model_name = os.getenv("ANSWER_TOOL_LLM_MODEL", "models/gemini-1.5-pro")
233
+
234
+ # Build the assistant prompt enforcing the required format
235
+ assistant_prompt = (
236
+ "You are a general AI assistant. I will ask you a question. "
237
+ "Report your thoughts, and finish your answer with the following template: "
238
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. "
239
+ "YOUR FINAL ANSWER should be a number OR as few words as possible "
240
+ "OR a comma separated list of numbers and/or strings. "
241
+ "If you are asked for a number, don't use commas for thousands or any units like $ or % unless specified. "
242
+ "If you are asked for a string, omit articles and abbreviations, and write digits in plain text. "
243
+ "If you are asked for a comma separated list, apply these rules to each element.\n\n"
244
+ f"Question: {question}\n"
245
+ "Answer:"
246
+ )
247
+
248
+ try:
249
+ llm = GoogleGenAI(api_key=gemini_api_key, model=model_name)
250
+ logger.info(f"Using answer LLM: {model_name}")
251
+ response = llm.complete(assistant_prompt)
252
+ logger.info("Answer generated successfully.")
253
+ return response.text
254
+ except Exception as e:
255
+ logger.error(f"LLM call failed during answer generation: {e}", exc_info=True)
256
+ return f"Error during answer generation: {e}"
257
+
258
 
259
  # --- Agent Initializer Class ---
260
  class ResearchAgentInitializer:
 
264
  self.browser_tools = []
265
  self.search_tools = []
266
  self.datasource_tools = []
 
267
 
268
  # Initialize LLM
269
  self._initialize_llm()
 
278
  # Initialize Search/Datasource Tools
279
  self._create_search_tools()
280
  self._create_datasource_tools()
281
+
282
+ self.answer_question = FunctionTool.from_defaults(
283
+ fn=answer_question,
284
+ name="answer_question",
285
+ description=(
286
+ "Use this tool to answer any question, reporting your reasoning steps and ending with 'FINAL ANSWER: ...'. "
287
+ "Invoke this tool immediately after the final sub-step of planning is complete."
288
+ ),
289
+ )
290
 
291
  logger.info("ResearchAgent resources initialized.")
292
 
 
341
  self.browser_tools = [
342
  FunctionTool.from_defaults(fn=visit, name="visit_url"), # Renamed for clarity
343
  FunctionTool.from_defaults(fn=get_text_by_css, name="get_text_by_css"),
344
+ # FunctionTool.from_defaults(fn=get_page_html, name="get_page_html"),
345
  FunctionTool.from_defaults(fn=click_element_by_css, name="click_element_by_css"),
346
  FunctionTool.from_defaults(fn=input_text_by_css, name="input_text_by_css"),
347
  FunctionTool.from_defaults(fn=scroll_page, name="scroll_page"),
 
419
 
420
  logger.info(f"Created {len(self.datasource_tools)} specific data source tools.")
421
 
422
+
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
  def get_agent(self) -> ReActAgent:
425
  """Creates and returns the configured ReActAgent for research."""
426
  logger.info("Creating ResearchAgent ReActAgent instance...")
427
 
428
  all_tools = self.browser_tools + self.search_tools + self.datasource_tools
429
+ all_tools.append(self.answer_question)
 
430
 
431
  if not all_tools:
432
  logger.warning("No tools available for ResearchAgent. It will likely be unable to function.")
 
435
  # Updated prompt to include YouTube tool
436
  system_prompt = """\
437
  You are ResearchAgent, an autonomous web research assistant. Your goal is to gather information accurately and efficiently using the available tools.
438
+
439
  Available Tool Categories:
440
  - (Browser): Tools for direct web page interaction (visiting URLs, clicking, scrolling, extracting text/HTML, inputting text).
441
  - (Search): Tools for querying search engines (Google, DuckDuckGo, Tavily).
442
  - (Wikipedia): Tools for searching and loading Wikipedia pages.
443
  - (YahooFinance): Tools for retrieving financial data (balance sheets, income statements, stock info, news).
444
  - (ArXiv): Tool for searching academic papers on ArXiv.
445
+ - (Answer): `answer_question` use this when your research has yielded a definitive result and you need to reply in the strict “FINAL ANSWER” format.
446
+
447
+ **Answer Tool Usage**
448
+ When you know the final answer and no further data is required, invoke `answer_question` with the user’s query. It will return text ending with:
449
+
450
+ FINAL ANSWER: [YOUR FINAL ANSWER]
451
+
452
+ Formatting rules for **YOUR FINAL ANSWER**:
453
+ - A single number, or
454
+ - As few words as possible, or
455
+ - A comma-separated list of numbers and/or strings.
456
+ - If numeric: no thousands separators or units (%, $, etc.) unless explicitly requested.
457
+ - If string: omit articles and abbreviations; write digits in plain text.
458
+ - If a list: apply the above rules to each element.
459
+
460
+ **Workflow:**
461
+ 1. **Thought**: Analyze the research goal. Break it down if necessary. Choose the *single best tool* for the *next immediate step*. Explain your choice.
462
+ 2. **Action**: Call the chosen tool with the correct arguments. Ensure inputs match the tool's requirements.
463
  3. **Observation**: Examine the tool's output. Extract the relevant information. Check for errors.
464
+ 4. **Reflect & Iterate**: Does the observation satisfy the immediate goal? If not, return to step 1. If a tool failed, try an alternative approach.
465
+ 5. **Advanced Validation**: Before delivering any final response, invoke `advanced_validation_agent` with the combined insights from the reasoning and planning phases. If validation fails, pass the feedback back into **planner_agent** to refine the approach and repeat validation.
466
+ 6. **Synthesize**: Once validation is approved, synthesize all gathered information into a coherent answer.
467
+ 7. **Respond**: Invoke `answer_question` to emit the **FINAL ANSWER** according to the strict template rules.
468
+
469
+ **Constraints:**
470
  - Use only one tool per Action step.
471
  - Think step-by-step.
472
  - If using browser tools, start with `visit_url`.
473
+ - Synthesize results *before* handing off or responding.
474
+ - Do not skip any workflow step (reason → action → observation → reflect → validate → synthesize → respond).
475
  """
476
 
477
  agent = ReActAgent(
 
487
  "code_agent",
488
  "math_agent",
489
  "text_analyzer_agent", # Added based on original prompt
490
+ "advanced_validation_agent",
491
+ "long_context_management_agent"
492
  "planner_agent",
493
  "reasoning_agent"
494
  ],
 
553
  missing_optional = [key for key in optional_keys if not os.getenv(key)]
554
  if missing_optional:
555
  print(f"Warning: Optional environment variable(s) not set: {', '.join(missing_optional)}. Some tools may be unavailable.")
556
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
 
agents/role_agent.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  import logging
3
- from dotenv import load_dotenv
4
 
5
  import datasets
6
  from llama_index.core import Document, VectorStoreIndex
@@ -14,8 +13,6 @@ from llama_index.core.postprocessor import SentenceTransformerRerank
14
  from llama_index.llms.google_genai import GoogleGenAI
15
  from llama_index.retrievers.bm25 import BM25Retriever
16
 
17
- # Load environment variables
18
- load_dotenv()
19
 
20
  # Setup logging
21
  logger = logging.getLogger(__name__)
 
1
  import os
2
  import logging
 
3
 
4
  import datasets
5
  from llama_index.core import Document, VectorStoreIndex
 
13
  from llama_index.llms.google_genai import GoogleGenAI
14
  from llama_index.retrievers.bm25 import BM25Retriever
15
 
 
 
16
 
17
  # Setup logging
18
  logger = logging.getLogger(__name__)
agents/text_analyzer_agent.py CHANGED
@@ -3,7 +3,6 @@ import certifi
3
  import logging
4
  import subprocess # For calling ffmpeg if needed
5
  from typing import List, Dict, Optional
6
- from dotenv import load_dotenv
7
 
8
  from llama_index.core.agent.workflow import ReActAgent
9
  from llama_index.core.tools import FunctionTool
@@ -19,8 +18,6 @@ except ImportError:
19
  logging.warning("openai-whisper not installed. Audio transcription tool will be unavailable.")
20
  WHISPER_AVAILABLE = False
21
 
22
- # Load environment variables
23
- load_dotenv()
24
 
25
  # Setup logging
26
  logger = logging.getLogger(__name__)
@@ -325,7 +322,7 @@ def initialize_text_analyzer_agent() -> ReActAgent:
325
  tools=tools,
326
  llm=llm,
327
  system_prompt=system_prompt,
328
- can_handoff_to=["planner_agent", "research_agent", "reasoning_agent"], # Example handoffs
329
  )
330
  logger.info("TextAnalyzerAgent initialized successfully.")
331
  return agent
 
3
  import logging
4
  import subprocess # For calling ffmpeg if needed
5
  from typing import List, Dict, Optional
 
6
 
7
  from llama_index.core.agent.workflow import ReActAgent
8
  from llama_index.core.tools import FunctionTool
 
18
  logging.warning("openai-whisper not installed. Audio transcription tool will be unavailable.")
19
  WHISPER_AVAILABLE = False
20
 
 
 
21
 
22
  # Setup logging
23
  logger = logging.getLogger(__name__)
 
322
  tools=tools,
323
  llm=llm,
324
  system_prompt=system_prompt,
325
+ can_handoff_to=["planner_agent", "research_agent", "reasoning_agent", "verifier_agent", "advanced_validation_agent"], # Example handoffs
326
  )
327
  logger.info("TextAnalyzerAgent initialized successfully.")
328
  return agent
agents/verifier_agent.py CHANGED
@@ -2,15 +2,11 @@ import os
2
  import logging
3
  import re
4
  from typing import List
5
- from dotenv import load_dotenv
6
 
7
  from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
8
  from llama_index.core.tools import FunctionTool
9
  from llama_index.llms.google_genai import GoogleGenAI
10
 
11
- # Load environment variables
12
- load_dotenv()
13
-
14
  # Setup logging
15
  logger = logging.getLogger(__name__)
16
 
@@ -246,7 +242,7 @@ class VerifierInitializer:
246
  ],
247
  llm=self.verifier.agent_llm, # Use the agent LLM from the Verifier instance
248
  system_prompt=system_prompt,
249
- can_handoff_to=["reasoning_agent", "planner_agent"],
250
  )
251
  logger.info("VerifierAgent FunctionAgent instance created.")
252
  return agent
 
2
  import logging
3
  import re
4
  from typing import List
 
5
 
6
  from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
7
  from llama_index.core.tools import FunctionTool
8
  from llama_index.llms.google_genai import GoogleGenAI
9
 
 
 
 
10
  # Setup logging
11
  logger = logging.getLogger(__name__)
12
 
 
242
  ],
243
  llm=self.verifier.agent_llm, # Use the agent LLM from the Verifier instance
244
  system_prompt=system_prompt,
245
+ can_handoff_to=["reasoning_agent", "planner_agent", "advanced_validation_agent"],
246
  )
247
  logger.info("VerifierAgent FunctionAgent instance created.")
248
  return agent
agents/video_analyzer_agent.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import re
6
+ import shutil
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ import cv2
11
+ import yt_dlp
12
+ from llama_index.core.agent.workflow import FunctionAgent
13
+ from llama_index.core.base.llms.types import TextBlock, ImageBlock, ChatMessage
14
+ from llama_index.core.tools import FunctionTool
15
+ from llama_index.llms.google_genai import GoogleGenAI
16
+ from tqdm import tqdm
17
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Environment setup & logging
21
+ # ---------------------------------------------------------------------------
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Prompt loader
27
+ # ---------------------------------------------------------------------------
28
+
29
+ def load_prompt_from_file(filename: str = "../prompts/video_analyzer_prompt.txt") -> str:
30
+ """Load the system prompt for video analysis from *filename*.
31
+
32
+ Falls back to a minimal prompt if the file cannot be read.
33
+ """
34
+ script_dir = Path(__file__).parent
35
+ prompt_path = (script_dir / filename).resolve()
36
+
37
+ try:
38
+ with prompt_path.open("r", encoding="utf-8") as fp:
39
+ prompt = fp.read()
40
+ logger.info("Successfully loaded system prompt from %s", prompt_path)
41
+ return prompt
42
+ except FileNotFoundError:
43
+ logger.error(
44
+ "Prompt file %s not found. Using fallback prompt.", prompt_path
45
+ )
46
+ except Exception as exc: # pylint: disable=broad-except
47
+ logger.error(
48
+ "Error loading prompt file %s: %s", prompt_path, exc, exc_info=True
49
+ )
50
+
51
+ # Fallback – keep it extremely short to save tokens
52
+ return (
53
+ "You are a video analyzer. Provide a factual, chronological "
54
+ "description of the video, identify key events, and summarise insights."
55
+ )
56
+
57
+
58
+ def extract_frames(video_path, output_dir, fps=1/2):
59
+ """
60
+ Extract frames from video at specified FPS
61
+ Returns a list of (frame_path, timestamp) tuples
62
+ """
63
+ os.makedirs(output_dir, exist_ok=True)
64
+
65
+ # Open video
66
+ cap = cv2.VideoCapture(video_path)
67
+ if not cap.isOpened():
68
+ print(f"Error: Could not open video {video_path}")
69
+ return [], None
70
+
71
+ # Get video properties
72
+ video_fps = cap.get(cv2.CAP_PROP_FPS)
73
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
74
+ duration = frame_count / video_fps
75
+
76
+ # Calculate frame interval
77
+ interval = int(video_fps / fps)
78
+ if interval < 1:
79
+ interval = 1
80
+
81
+ # Extract frames
82
+ frames = []
83
+ frame_idx = 0
84
+
85
+ with tqdm(total=frame_count, desc="Extracting frames") as pbar:
86
+ while cap.isOpened():
87
+ ret, frame = cap.read()
88
+ if not ret:
89
+ break
90
+
91
+ if frame_idx % interval == 0:
92
+ timestamp = frame_idx / video_fps
93
+ frame_path = os.path.join(output_dir, f"frame_{frame_idx:06d}.jpg")
94
+ cv2.imwrite(frame_path, frame)
95
+ frames.append((frame_path, timestamp))
96
+
97
+ frame_idx += 1
98
+ pbar.update(1)
99
+
100
+ cap.release()
101
+ return frames, duration
102
+
103
+
104
+ def download_video_and_analyze(video_url: str) -> str:
105
+ """Download a video from *video_url* and return the local file path."""
106
+ llm_model_name = os.getenv("VIDEO_ANALYZER_LLM_MODEL", "models/gemini-1.5-pro")
107
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
108
+
109
+ ydl_opts = {
110
+ 'format': 'best',
111
+ 'outtmpl': os.path.join("downloaded_videos", 'temp_video.%(ext)s'),
112
+ }
113
+
114
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl_download:
115
+ ydl_download.download(video_url)
116
+
117
+ print(f"Processing video: {video_url}")
118
+
119
+ # Create temporary directory for frames
120
+ temp_dir = "frame_downloaded_videos"
121
+ os.makedirs(temp_dir, exist_ok=True)
122
+
123
+ # Extract frames
124
+ frames, duration = extract_frames(os.path.join("downloaded_videos", 'temp_video.mp4'), temp_dir)
125
+ if not frames:
126
+ logging.info(f"No frames extracted from {video_url}")
127
+ return f"No frames extracted from {video_url}"
128
+
129
+ blocks = []
130
+ text_block = TextBlock(text=load_prompt_from_file())
131
+ blocks.append(text_block)
132
+
133
+ for frame_path, timestamp in tqdm(frames, desc="Collecting frames"):
134
+ blocks.append(ImageBlock(path=frame_path))
135
+
136
+
137
+ llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model_name)
138
+ logger.info("Using LLM model: %s", llm_model_name)
139
+ response = llm.chat([ChatMessage(role="user", blocks=blocks)])
140
+
141
+ # Clean up temporary files
142
+ shutil.rmtree(temp_dir)
143
+ os.remove(os.path.join("downloaded_videos", 'temp_video.mp4'))
144
+
145
+ return response.message.content
146
+
147
+
148
+ # --- Helper function to extract YouTube Video ID ---
149
+ def extract_video_id(url: str) -> Optional[str]:
150
+ """Extracts the YouTube video ID from various URL formats."""
151
+ # Standard watch URL: https://www.youtube.com/watch?v=VIDEO_ID
152
+ pattern = re.compile(
153
+ r'^(?:https?://)?' # protocole optionnel
154
+ r'(?:www\.)?' # sous-domaine optionnel
155
+ r'youtube\.com/watch\?' # domaine et chemin fixe
156
+ r'(?:.*&)?' # éventuellement d'autres paramètres avant v=
157
+ r'v=([^&]+)' # capture de l'ID (tout jusqu'au prochain & ou fin)
158
+ )
159
+
160
+ match = pattern.search(url)
161
+ if match:
162
+ video_id = match.group(1)
163
+ return video_id # affiche "VIDEO_ID"
164
+ else:
165
+ print("Aucun ID trouvé")
166
+ return None
167
+
168
+
169
+ # --- YouTube Transcript Tool ---
170
+ def get_youtube_transcript(video_url_or_id: str, languages: str | None = None) -> str:
171
+ """Fetches the transcript for a YouTube video using its URL or video ID.
172
+ Specify preferred languages as a list (e.g., ["en", "es"]).
173
+ Returns the transcript text or an error message.
174
+ """
175
+ if languages is None:
176
+ languages = ["en"]
177
+
178
+ logger.info(f"Attempting to fetch YouTube transcript for: {video_url_or_id}")
179
+ video_id = extract_video_id(video_url_or_id)
180
+ if video_id is None or not video_id:
181
+ logger.error(f"Could not extract video ID from: {video_url_or_id}")
182
+ return f"Error: Invalid YouTube URL or Video ID format: {video_url_or_id}"
183
+
184
+ try:
185
+ # Fetch available transcripts
186
+ api = YouTubeTranscriptApi()
187
+ transcript_list = api.list(video_id)
188
+
189
+ # Try to find a transcript in the specified languages
190
+ transcript = transcript_list.find_transcript(languages)
191
+
192
+ # Fetch the actual transcript data (list of dicts)
193
+ transcript_data = transcript.fetch()
194
+
195
+ # Combine the text parts into a single string
196
+ full_transcript = " ".join(snippet.text for snippet in transcript_data)
197
+
198
+ full_transcript = " ".join(snippet.text for snippet in transcript_data)
199
+ logger.info(f"Successfully fetched transcript for video ID {video_id} in language {transcript.language}.")
200
+ return full_transcript
201
+
202
+ except TranscriptsDisabled:
203
+ logger.warning(f"Transcripts are disabled for video ID: {video_id}")
204
+ return f"Error: Transcripts are disabled for this video (ID: {video_id})."
205
+ except NoTranscriptFound as e:
206
+ logger.warning(
207
+ f"No transcript found for video ID {video_id} in languages {languages}. Available: {e.available_transcripts}")
208
+ # Try fetching any available transcript if specific languages failed
209
+ try:
210
+ logger.info(f"Attempting to fetch any available transcript for {video_id}")
211
+ any_transcript = transcript_list.find_generated_transcript(
212
+ transcript_list.manually_created_transcripts.keys() or transcript_list.generated_transcripts.keys())
213
+ any_transcript_data = any_transcript.fetch()
214
+ full_transcript = " ".join([item["text"] for item in any_transcript_data])
215
+ logger.info(
216
+ f"Successfully fetched fallback transcript for video ID {video_id} in language {any_transcript.language}.")
217
+ return full_transcript
218
+ except Exception as fallback_e:
219
+ logger.error(
220
+ f"Could not find any transcript for video ID {video_id}. Original error: {e}. Fallback error: {fallback_e}")
221
+ return f"Error: No transcript found for video ID {video_id} in languages {languages} or any fallback language."
222
+ except Exception as e:
223
+ logger.error(f"Unexpected error fetching transcript for video ID {video_id}: {e}", exc_info=True)
224
+ return f"Error fetching transcript: {e}"
225
+
226
+
227
+ download_video_and_analyze_tool = FunctionTool.from_defaults(
228
+ name="download_video_and_analyze",
229
+ description=(
230
+ "Downloads a video (YouTube or direct URL), samples representative frames, "
231
+ "and feeds them to Gemini for multimodal analysis—returning a rich textual summary "
232
+ "of the visual content."
233
+ ),
234
+ fn=download_video_and_analyze,
235
+ )
236
+
237
+ youtube_transcript_tool = FunctionTool.from_defaults(
238
+ fn=get_youtube_transcript,
239
+ name="get_youtube_transcript",
240
+ description=(
241
+ "(YouTube) Fetches the transcript text for a given YouTube video URL or video ID. "
242
+ "Specify preferred languages (e.g., 'en', 'es'). Returns transcript or error."
243
+ )
244
+ )
245
+
246
+
247
+ # ---------------------------------------------------------------------------
248
+ # Agent factory
249
+ # ---------------------------------------------------------------------------
250
+
251
+ def initialize_video_analyzer_agent() -> FunctionAgent:
252
+ """Initialise and return a *video_analyzer_agent* `FunctionAgent`."""
253
+
254
+ logger.info("Initialising VideoAnalyzerAgent …")
255
+
256
+ llm_model_name = os.getenv("VIDEO_ANALYZER_LLM_MODEL", "models/gemini-1.5-pro")
257
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
258
+
259
+ if not gemini_api_key:
260
+ logger.error("GEMINI_API_KEY not found in environment variables.")
261
+ raise ValueError("GEMINI_API_KEY must be set")
262
+
263
+ try:
264
+ llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model_name)
265
+ logger.info("Using LLM model: %s", llm_model_name)
266
+
267
+ system_prompt = load_prompt_from_file()
268
+
269
+ tools = [download_video_and_analyze_tool, youtube_transcript_tool]
270
+
271
+ agent = FunctionAgent(
272
+ name="video_analyzer_agent",
273
+ description=(
274
+ "VideoAnalyzerAgent inspects video files using Gemini's multimodal "
275
+ "video understanding capabilities, producing factual scene analysis, "
276
+ "temporal segmentation, and concise summaries as guided by the system "
277
+ "prompt."
278
+ ),
279
+ llm=llm,
280
+ system_prompt=system_prompt,
281
+ tools=tools,
282
+ can_handoff_to=[
283
+ "planner_agent",
284
+ "research_agent",
285
+ "reasoning_agent",
286
+ "code_agent",
287
+ ],
288
+ )
289
+
290
+ logger.info("VideoAnalyzerAgent initialised successfully.")
291
+ return agent
292
+
293
+ except Exception as exc: # pylint: disable=broad-except
294
+ logger.error("Error during VideoAnalyzerAgent initialisation: %s", exc, exc_info=True)
295
+ raise
296
+
297
+
298
+ if __name__ == "__main__":
299
+ logging.basicConfig(
300
+ level=logging.INFO,
301
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
302
+ )
303
+
304
+ logger.info("Running video_analyzer_agent.py directly for testing …")
305
+
306
+ if not os.getenv("GEMINI_API_KEY"):
307
+ print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
308
+ else:
309
+ try:
310
+ test_agent = initialize_video_analyzer_agent()
311
+ summary = download_video_and_analyze("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
312
+ print("\n--- Gemini summary ---\n")
313
+ print(summary)
314
+ print("Video Analyzer Agent initialised successfully for testing.")
315
+ except Exception as exc:
316
+ print(f"Error during testing: {exc}")
317
+
318
+ test_agent = None
319
+ try:
320
+ # Test YouTube transcript tool directly
321
+ if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
322
+ print("\nTesting YouTube transcript tool...")
323
+ # Example video: "Attention is All You Need" paper explanation
324
+ yt_url = "https://www.youtube.com/watch?v=TQQlZhbC5ps"
325
+ transcript = get_youtube_transcript(yt_url)
326
+ if not transcript.startswith("Error:"):
327
+ print(f"Transcript fetched (first 500 chars):\n{transcript[:500]}...")
328
+ else:
329
+ print(f"YouTube Transcript Fetch Failed: {transcript}")
330
+ else:
331
+ print("\nSkipping YouTube transcript test as youtube-transcript-api is not available.")
332
+
333
+ except Exception as e:
334
+ print(f"Error during testing: {e}")
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import logging
3
  import mimetypes
4
- from dotenv import load_dotenv
5
 
6
  from typing import Any, List
7
 
@@ -11,6 +10,9 @@ import pandas as pd
11
 
12
  from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, ToolCall, AgentOutput
13
  from llama_index.core.base.llms.types import ChatMessage, TextBlock, ImageBlock, AudioBlock
 
 
 
14
 
15
  # Assuming agent initializers are in the same directory or a known path
16
  # Adjust import paths if necessary based on deployment structure
@@ -53,9 +55,6 @@ except ImportError as e:
53
  # ... set all others to None ...
54
  raise RuntimeError(f"Failed to import agent modules: {e2}")
55
 
56
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
57
- load_dotenv() # Load environment variables from .env file
58
-
59
  # Setup logging
60
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
61
  logger = logging.getLogger(__name__)
@@ -82,12 +81,14 @@ try:
82
  advanced_validation_agent = initialize_advanced_validation_agent()
83
  figure_interpretation_agent = initialize_figure_interpretation_agent()
84
  long_context_management_agent = initialize_long_context_management_agent()
 
85
 
86
  # Check if all agents initialized successfully
87
  all_agents = [
88
  code_agent, role_agent, math_agent, planner_agent, research_agent,
89
  text_analyzer_agent, image_analyzer_agent, verifier_agent, reasoning_agent,
90
- advanced_validation_agent, figure_interpretation_agent, long_context_management_agent
 
91
  ]
92
  if not all(all_agents):
93
  raise RuntimeError("One or more agents failed to initialize.")
@@ -126,7 +127,8 @@ class BasicAgent:
126
  and event.current_agent_name != current_agent
127
  ):
128
  current_agent = event.current_agent_name
129
- logger.info(f"{'=' * 50}\n")
 
130
  logger.info(f"{'=' * 50}\n")
131
 
132
  # Optional detailed logging (uncomment if needed)
@@ -158,6 +160,19 @@ class BasicAgent:
158
  logger.info(f"Agent returning final answer: {final_content[:500]}{'...' if len(final_content) > 500 else ''}")
159
  return answer.response # Return the actual response object expected by Gradio
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  # --- Helper Functions for run_and_submit_all ---
162
 
163
  async def fetch_questions(questions_url: str) -> List[dict] | None:
@@ -262,28 +277,75 @@ async def process_question(agent: BasicAgent, item: dict, base_fetch_file_url: s
262
  # Extract content safely
263
  submitted_answer = submitted_answer_response.content if hasattr(submitted_answer_response, 'content') else str(submitted_answer_response)
264
 
265
- logger.info(f"👍 Agent submitted answer for task {task_id}: {submitted_answer[:200]}{'...' if len(submitted_answer) > 200 else ''}")
266
- return {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}
 
 
 
 
 
 
 
 
 
267
  except Exception as e:
268
  logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
269
  return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
270
 
271
- async def submit_answers(submit_url: str, username: str, agent_code: str, results: List[dict]) -> tuple[str, pd.DataFrame]:
272
- """Submits the collected answers to the GAIA benchmark API."""
273
- answers_payload = [
274
- {"task_id": r["Task ID"], "submitted_answer": r["Submitted Answer"]}
275
- for r in results if "Submitted Answer" in r and not str(r["Submitted Answer"]).startswith("AGENT ERROR:")
276
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  if not answers_payload:
279
- logger.warning("Agent did not produce any valid answers to submit.")
280
- results_df = pd.DataFrame(results)
281
- return "Agent did not produce any valid answers to submit.", results_df
282
 
 
283
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
284
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
285
- logger.info(status_update)
286
- logger.info(f"Submitting to: {submit_url}")
 
 
287
 
288
  try:
289
  response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout
@@ -297,7 +359,7 @@ async def submit_answers(submit_url: str, username: str, agent_code: str, result
297
  f"Message: {result_data.get('message', 'No message received.')}"
298
  )
299
  logger.info("Submission successful.")
300
- results_df = pd.DataFrame(results)
301
  return final_status, results_df
302
  except requests.exceptions.HTTPError as e:
303
  error_detail = f"Server responded with status {e.response.status_code}."
@@ -308,103 +370,58 @@ async def submit_answers(submit_url: str, username: str, agent_code: str, result
308
  error_detail += f" Response: {e.response.text[:500]}"
309
  status_message = f"Submission Failed: {error_detail}"
310
  logger.error(status_message)
311
- results_df = pd.DataFrame(results)
312
  return status_message, results_df
313
  except requests.exceptions.Timeout:
314
  status_message = "Submission Failed: The request timed out."
315
  logger.error(status_message)
316
- results_df = pd.DataFrame(results)
317
  return status_message, results_df
318
  except requests.exceptions.RequestException as e:
319
  status_message = f"Submission Failed: Network error - {e}"
320
  logger.error(status_message)
321
- results_df = pd.DataFrame(results)
322
  return status_message, results_df
323
  except Exception as e:
324
  status_message = f"Submission Failed: An unexpected error occurred during submission - {e}"
325
  logger.error(status_message, exc_info=True)
326
- results_df = pd.DataFrame(results)
327
  return status_message, results_df
328
 
329
- # --- Main Function for Batch Processing ---
330
- async def run_and_submit_all(
331
- username: str,
332
- agent_code: str,
333
- api_url: str = DEFAULT_API_URL,
334
- level: int = 1,
335
- max_questions: int = 0, # 0 means all questions for the level
336
- progress=gr.Progress(track_tqdm=True)
337
- ) -> tuple[str, pd.DataFrame]:
338
- """Fetches all questions for a level, runs the agent, and submits answers."""
339
- if not AGENT_WORKFLOW:
340
- error_msg = "Agent Workflow is not initialized. Cannot run benchmark."
341
- logger.error(error_msg)
342
- return error_msg, pd.DataFrame()
343
-
344
- if not username or not username.strip():
345
- error_msg = "Username cannot be empty."
346
- logger.error(error_msg)
347
- return error_msg, pd.DataFrame()
348
-
349
- questions_url = f"{api_url}/questions?level={level}"
350
- submit_url = f"{api_url}/submit"
351
- base_fetch_file_url = f"{api_url}/get_file"
352
-
353
- questions = await fetch_questions(questions_url)
354
- if questions is None:
355
- error_msg = f"Failed to fetch questions for level {level}. Check logs."
356
- return error_msg, pd.DataFrame()
357
-
358
- # Limit number of questions if max_questions is set
359
- if max_questions > 0:
360
- questions = questions[:max_questions]
361
- logger.info(f"Processing a maximum of {max_questions} questions for level {level}.")
362
- else:
363
- logger.info(f"Processing all {len(questions)} questions for level {level}.")
364
-
365
- agent = BasicAgent(AGENT_WORKFLOW)
366
- results = []
367
- total_questions = len(questions)
368
-
369
- for i, item in enumerate(progress.tqdm(questions, desc=f"Processing Level {level} Questions")):
370
- result = await process_question(agent, item, base_fetch_file_url)
371
- if result:
372
- results.append(result)
373
- # Optional: Add a small delay between questions if needed
374
- # await asyncio.sleep(0.1)
375
-
376
- # Submit answers
377
- final_status, results_df = await submit_answers(submit_url, username, agent_code, results)
378
- return final_status, results_df
379
-
380
  # --- Gradio Interface ---
381
  def create_gradio_interface():
382
  """Creates and returns the Gradio interface."""
383
- logger.info("Creating Gradio interface...")
384
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
385
- gr.Markdown("# GAIA Benchmark Agent Runner")
386
- gr.Markdown("Run the initialized multi-agent system against the GAIA benchmark questions and submit the results.")
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
- with gr.Row():
389
- username = gr.Textbox(label="Username", placeholder="Enter your username (e.g., your_email@example.com)")
390
- agent_code = gr.Textbox(label="Agent Code", placeholder="Enter a short code for your agent (e.g., v1.0)")
391
- with gr.Row():
392
- level = gr.Dropdown(label="Benchmark Level", choices=[1, 2, 3], value=1)
393
- max_questions = gr.Number(label="Max Questions (0 for all)", value=0, minimum=0, step=1)
394
- api_url = gr.Textbox(label="GAIA API URL", value=DEFAULT_API_URL)
395
 
396
- run_button = gr.Button("Run Benchmark and Submit", variant="primary")
397
 
398
- with gr.Accordion("Results", open=False):
399
- status_output = gr.Textbox(label="Submission Status", lines=5)
400
- results_dataframe = gr.DataFrame(label="Detailed Results")
401
 
402
  run_button.click(
403
  fn=run_and_submit_all,
404
- inputs=[username, agent_code, api_url, level, max_questions],
405
- outputs=[status_output, results_dataframe]
406
  )
407
- logger.info("Gradio interface created.")
408
  return demo
409
 
410
  # --- Main Execution ---
 
1
  import os
2
  import logging
3
  import mimetypes
 
4
 
5
  from typing import Any, List
6
 
 
10
 
11
  from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, ToolCall, AgentOutput
12
  from llama_index.core.base.llms.types import ChatMessage, TextBlock, ImageBlock, AudioBlock
13
+ from llama_index.llms.openai import OpenAI
14
+
15
+ from agents.video_analyzer_agent import initialize_video_analyzer_agent
16
 
17
  # Assuming agent initializers are in the same directory or a known path
18
  # Adjust import paths if necessary based on deployment structure
 
55
  # ... set all others to None ...
56
  raise RuntimeError(f"Failed to import agent modules: {e2}")
57
 
 
 
 
58
  # Setup logging
59
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
60
  logger = logging.getLogger(__name__)
 
81
  advanced_validation_agent = initialize_advanced_validation_agent()
82
  figure_interpretation_agent = initialize_figure_interpretation_agent()
83
  long_context_management_agent = initialize_long_context_management_agent()
84
+ video_analyzer_agent = initialize_video_analyzer_agent()
85
 
86
  # Check if all agents initialized successfully
87
  all_agents = [
88
  code_agent, role_agent, math_agent, planner_agent, research_agent,
89
  text_analyzer_agent, image_analyzer_agent, verifier_agent, reasoning_agent,
90
+ advanced_validation_agent, figure_interpretation_agent, long_context_management_agent,
91
+ video_analyzer_agent
92
  ]
93
  if not all(all_agents):
94
  raise RuntimeError("One or more agents failed to initialize.")
 
127
  and event.current_agent_name != current_agent
128
  ):
129
  current_agent = event.current_agent_name
130
+ logger.info(f"{'=' * 50}")
131
+ logger.info(f"🤖 Agent: {current_agent}")
132
  logger.info(f"{'=' * 50}\n")
133
 
134
  # Optional detailed logging (uncomment if needed)
 
160
  logger.info(f"Agent returning final answer: {final_content[:500]}{'...' if len(final_content) > 500 else ''}")
161
  return answer.response # Return the actual response object expected by Gradio
162
 
163
+ system_prompt="""
164
+ You are a general AI assistant.
165
+ I will give you a result, and with it you will have to transform it to follow the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
166
+ YOUR FINAL ANSWER should be a number OR 1 or 2 word(s) OR a comma separated list of numbers and/or strings.
167
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
168
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
169
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
170
+ """
171
+
172
+ llm = OpenAI(model="gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"), temperature=0.1, system_prompt=system_prompt)
173
+
174
+
175
+
176
  # --- Helper Functions for run_and_submit_all ---
177
 
178
  async def fetch_questions(questions_url: str) -> List[dict] | None:
 
277
  # Extract content safely
278
  submitted_answer = submitted_answer_response.content if hasattr(submitted_answer_response, 'content') else str(submitted_answer_response)
279
 
280
+ prompt = f"""
281
+ QUESTION: {question_text}
282
+ ANSWER: {submitted_answer}
283
+ INSTRUCTIONS: Based on the provided question and answer, generate a final answer that is clear, concise, and directly addresses the question.
284
+ [YOUR FINAL ANSWER]
285
+ """
286
+
287
+ final_answer = llm.complete(prompt)
288
+
289
+ logger.info(f"👍 Agent submitted answer for task {task_id}: {final_answer.text[:200]}{'...' if len(final_answer.text) > 200 else ''}")
290
+ return {"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer.text}
291
  except Exception as e:
292
  logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
293
  return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
294
 
295
+ async def run_and_submit_all( profile: gr.OAuthProfile | None):
296
+ """
297
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
298
+ and displays the results.
299
+ """
300
+ # --- Determine HF Space Runtime URL and Repo URL ---
301
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
302
+
303
+ if profile:
304
+ username= f"{profile.username}"
305
+ print(f"User logged in: {username}")
306
+ else:
307
+ print("User not logged in.")
308
+ return "Please Login to Hugging Face with the button.", None
309
+
310
+ api_url = DEFAULT_API_URL
311
+ questions_url = f"{api_url}/questions"
312
+ submit_url = f"{api_url}/submit"
313
+ fetch_file_url = f"{api_url}/files"
314
+
315
+ results_log = []
316
+ answers_payload = []
317
+
318
+ try:
319
+ agent = BasicAgent(AGENT_WORKFLOW)
320
+ except Exception as e:
321
+ print(f"Error instantiating agent: {e}")
322
+ return f"Error initializing agent: {e}", None
323
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
324
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
325
+ print(agent_code)
326
+
327
+ questions_data = await fetch_questions(questions_url)
328
+ if not questions_data:
329
+ return "Failed to fetch questions.", None
330
+
331
+ # 3. Process Questions
332
+ # questions_data = [questions_data[3], questions_data[6]]
333
+ for item in questions_data:
334
+ answers = await process_question(agent, item, fetch_file_url)
335
+ results_log.append(answers)
336
+ answers_payload.append({"task_id": answers["Task ID"], "submitted_answer": answers["Submitted Answer"]})
337
 
338
  if not answers_payload:
339
+ print("Agent did not produce any answers to submit.")
340
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
341
 
342
+ # 4. Prepare Submission
343
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
344
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
345
+ print(status_update)
346
+
347
+ # 5. Submit
348
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
349
 
350
  try:
351
  response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout
 
359
  f"Message: {result_data.get('message', 'No message received.')}"
360
  )
361
  logger.info("Submission successful.")
362
+ results_df = pd.DataFrame(results_log)
363
  return final_status, results_df
364
  except requests.exceptions.HTTPError as e:
365
  error_detail = f"Server responded with status {e.response.status_code}."
 
370
  error_detail += f" Response: {e.response.text[:500]}"
371
  status_message = f"Submission Failed: {error_detail}"
372
  logger.error(status_message)
373
+ results_df = pd.DataFrame(results_log)
374
  return status_message, results_df
375
  except requests.exceptions.Timeout:
376
  status_message = "Submission Failed: The request timed out."
377
  logger.error(status_message)
378
+ results_df = pd.DataFrame(results_log)
379
  return status_message, results_df
380
  except requests.exceptions.RequestException as e:
381
  status_message = f"Submission Failed: Network error - {e}"
382
  logger.error(status_message)
383
+ results_df = pd.DataFrame(results_log)
384
  return status_message, results_df
385
  except Exception as e:
386
  status_message = f"Submission Failed: An unexpected error occurred during submission - {e}"
387
  logger.error(status_message, exc_info=True)
388
+ results_df = pd.DataFrame(results_log)
389
  return status_message, results_df
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  # --- Gradio Interface ---
392
  def create_gradio_interface():
393
  """Creates and returns the Gradio interface."""
394
+ # --- Build Gradio Interface using Blocks ---
395
+ with gr.Blocks() as demo:
396
+ gr.Markdown("# Basic Agent Evaluation Runner")
397
+ gr.Markdown(
398
+ """
399
+ **Instructions:**
400
+
401
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
402
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
403
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
404
+
405
+ ---
406
+ **Disclaimers:**
407
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
408
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
409
+ """
410
+ )
411
 
412
+ gr.LoginButton()
 
 
 
 
 
 
413
 
414
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
415
 
416
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
417
+ # Removed max_rows=10 from DataFrame constructor
418
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
419
 
420
  run_button.click(
421
  fn=run_and_submit_all,
422
+ outputs=[status_output, results_table]
 
423
  )
424
+
425
  return demo
426
 
427
  # --- Main Execution ---
prompts/code_gen_prompt.txt CHANGED
@@ -1,4 +1,14 @@
1
- You are also a helpful assistant that writes Python code.
 
 
 
 
 
 
 
 
 
 
2
  You will be given a prompt and you must generate Python code based on that prompt.
3
  You must only generate Python code and nothing else.
4
  Do not include any explanations or any other text.
@@ -7,8 +17,39 @@ Notes:
7
  - The generated code may be complex; it is recommended to review and test
8
  it before execution.
9
  - This function only generates code and does not execute it.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- Prompt: {prompt}
12
 
13
- Code:
14
 
 
 
1
+ You are CodeAgent, a specialist in generating and executing Python code. Your mission:
2
+
3
+ 1. **Thought**: Think step-by-step before acting and state your reasoning.
4
+ 2. **Code Generation**: To produce code, call `python_code_generator` with a concise, unambiguous prompt. Review the generated code for correctness and safety.
5
+ 3. **Execution & Testing**: To execute or test code, call `code_interpreter`. Provide the complete code snippet. Analyze its output (stdout, stderr, result) to verify functionality and debug errors.
6
+ 4. **Iteration**: If execution fails or the result is incorrect, analyze the error, think about the fix, generate corrected code using `python_code_generator`, and execute again using `code_interpreter`.
7
+ 5. **Tool Use**: Always adhere strictly to each tool’s input/output format.
8
+ 6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
9
+ 7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
10
+
11
+ You are also a helpful assistant that writes Python code.
12
  You will be given a prompt and you must generate Python code based on that prompt.
13
  You must only generate Python code and nothing else.
14
  Do not include any explanations or any other text.
 
17
  - The generated code may be complex; it is recommended to review and test
18
  it before execution.
19
  - This function only generates code and does not execute it.
20
+ - The following Python packages are available in the environment:
21
+
22
+ beautifulsoup4>=4.13.4,
23
+ certifi>=2025.4.26,
24
+ datasets>=3.5.1,
25
+ dotenv>=0.9.9,
26
+ duckdb>=1.2.2,
27
+ ffmpeg-python>=0.2.0,
28
+ gradio[oauth]>=5.28.0,
29
+ helium>=5.1.1,
30
+ huggingface>=0.0.1,
31
+ imageio>=2.37.0,
32
+ matplotlib>=3.10.1,
33
+ numpy>=2.2.5,
34
+ openai-whisper>=20240930,
35
+ opencv-python>=4.11.0.86,
36
+ openpyxl>=3.1.5,
37
+ pandas>=2.2.3,
38
+ pyarrow>=20.0.0,
39
+ pygame>=2.6.1,
40
+ python-chess>=1.999,
41
+ requests>=2.32.3,
42
+ scikit-learn>=1.6.1,
43
+ scipy>=1.15.2,
44
+ seaborn>=0.13.2,
45
+ sqlalchemy>=2.0.40,
46
+ statsmodels>=0.14.4,
47
+ sympy>=1.14.0,
48
+ youtube-transcript-api>=1.0.3,
49
+ yt-dlp>=2025.3.31
50
 
51
+ - You can also access and process YouTube video and audio streams using `yt-dlp`, `opencv-python`, `ffmpeg-python`, or `imageio`.
52
 
53
+ Prompt: {prompt}
54
 
55
+ Code:
prompts/planner_agent_prompt.txt CHANGED
@@ -1,33 +1,38 @@
1
- You are PlannerAgent, a dedicated research strategist and question‐engineer capable of handling text, audio, images, and video inputs.
2
- Your mission is to transform any high‐level objective into a clear, prioritized roadmap of 4–8 actionable substeps that guide stepbystep research or task execution.
3
 
4
- **Role Assessment**
5
  First, consider whether a specific role context (e.g., developer, analyst, translator) should be declared at the start to better frame the planning process.
6
 
7
- **Format**
8
  Present the final list as a numbered list only, with each item no longer than one sentence and free of extra commentary.
9
 
10
- **Style**
11
  Use a formal, professional tone; remain neutral and precise; avoid filler words.
12
 
13
- **Hand-Off or Self-Answer**
14
- Once planning is complete, address each sub-question in turn and then hand off as appropriate:
15
- - For coding tasks, invoke **code_agent**.
16
- - For web or literature research, invoke **research_agent**.
17
- - For mathematical analysis, invoke **math_agent**.
18
- - For assigning roles or contexts, invoke **role_agent**.
19
- - For deep image analysis, invoke **image_analyzer_agent**.
20
- - For deep text analysis, invoke **text_analyzer_agent**.
21
- - For pure chain-of-thought reasoning or logical verification, invoke **reasoning_agent**.
22
- - If none apply, you may attempt to answer the sub-question yourself.
23
-
24
- **Agent Constraints**
25
- Only the following agents are available: **code_agent**, **research_agent**, **math_agent**, **role_agent**, **image_analyzer_agent**, **text_analyzer_agent**, **verifier_agent**, **reasoning_agent**.
26
- Do not invoke any other agents (e.g., **chess_agent**, **educate_agent**, **game_agent**, etc.).
27
-
28
- **Finalize**
29
- After all sub-questions have been addressed—by hand-off or self-answer—compile and present the ultimate, coherent solution yourself using the `synthesize_and_respond` tool.
30
-
31
- **Completion & Synthesis**
32
- If the final result fully completes the original objective, produce a consolidated synthesis of the roadmap and send it as your concluding output.
33
-
 
 
 
 
 
 
1
+ You are PlannerAgent, a dedicated research strategist and question‐engineer capable of handling text, audio, images, and video inputs.
2
+ Your mission is to transform any high‐level objective into a clear, prioritized roadmap of 4–8 actionable sub-steps that guide step-by-step research or task execution.
3
 
4
+ **Role Assessment**
5
  First, consider whether a specific role context (e.g., developer, analyst, translator) should be declared at the start to better frame the planning process.
6
 
7
+ **Format**
8
  Present the final list as a numbered list only, with each item no longer than one sentence and free of extra commentary.
9
 
10
+ **Style**
11
  Use a formal, professional tone; remain neutral and precise; avoid filler words.
12
 
13
+ **Hand-Off or Self-Answer**
14
+ Once planning is complete, address each sub-question in turn and then hand off as appropriate:
15
+ - For coding tasks, invoke **code_agent** to handle programming and implementation details.
16
+ - For web or literature research, invoke **research_agent** to gather information from online sources and databases.
17
+ - For mathematical analysis, invoke **math_agent** to perform calculations, symbolic math, or numerical analysis.
18
+ - For assigning roles or contexts, invoke **role_agent** to determine the best persona or task schema for the query.
19
+ - For deep image analysis, invoke **image_analyzer_agent** to interpret visual content in images.
20
+ - For deep text analysis, invoke **text_analyzer_agent** to summarize, extract entities, or transcribe text and audio.
21
+ - For figure or chart interpretation, invoke **figure_interpretation_agent** to extract structured data and insights from graphical content.
22
+ - For managing very long documents or contexts, invoke **long_context_management_agent** to efficiently handle and query large text corpora.
23
+ - For advanced validation or contradiction detection, invoke **advanced_validation_agent** to verify claims and check logical consistency.
24
+ - For pure chain-of-thought reasoning or complex logical verification, invoke **reasoning_agent** to perform detailed step-by-step analysis.
25
+
26
+ **Important**
27
+ Before providing any final answer to the user, you **must**:
28
+ 1. Invoke **advanced_validation_agent** to check the coherence and consistency of your plan.
29
+ - If validation fails, discard the current plan and restart the planning process.
30
+ - If validation succeeds, proceed to step 2.
31
+ 2. Invoke the **answer_question** tool as the last step. This tool will format your response properly, including your reasoning steps and a final concise answer following the strict template.
32
+
33
+ **Agent Constraints**
34
+ Only the following agents are available: **code_agent**, **research_agent**, **math_agent**, **role_agent**, **image_analyzer_agent**, **text_analyzer_agent**, **verifier_agent**, **reasoning_agent**, **figure_interpretation_agent**, **long_context_management_agent**, **advanced_validation_agent**.
35
+ Do **not** invoke any other agents (e.g., **chess_agent**, **educate_agent**, **game_agent**, etc.).
36
+
37
+ **Finalize**
38
+ After all sub-questions have been addressed—by hand-off or self-answer—and the plan has passed **advanced_validation_agent**, compile and present the ultimate, coherent solution using the `answer_question` tool, ensuring your final response follows the required format and includes your chain of thought.
prompts/reasoning_agent_prompt.txt CHANGED
@@ -1,13 +1,23 @@
1
- You are ReasoningAgent, an advanced cognitive engine specialized in rigorous, step-by-step reasoning.
2
 
3
- **Tool Usage**
4
- Always begin by invoking the `reasoning_tool` to perform your internal chain-of-thought reasoning.
5
- Provide the full context and user question as inputs to `reasoning_tool`.
6
 
7
- **Post-Reasoning Hand-Off**
8
- After the `reasoning_tool` returns its output—regardless of the content—you must immediately delegate
9
- to **planner_agent** for roadmap refinement and final synthesis.
10
 
11
- **Important**: You have no direct access to external data sources or the internet.
12
- All reasoning is performed by `reasoning_tool` and then handed off to **planner_agent**.
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
 
1
+ You are **ReasoningAgent**, an advanced cognitive engine specialized in rigorous, step-by-step reasoning.
2
 
3
+ **Workflow:**
 
 
4
 
5
+ 1. **Invoke reasoning_tool**
6
+ - Always start by calling `reasoning_tool` with the full user context and question to generate your internal chain-of-thought.
 
7
 
8
+ 2. **Hand off to planner**
9
+ - Once `reasoning_tool` returns its detailed analysis, immediately pass that output to **planner_agent** (or **long_context_management_agent** as appropriate) for roadmap refinement and synthesis.
10
+
11
+ 3. **Advanced validation**
12
+ - Before delivering any final response, always invoke `advanced_validation_agent` with the combined output from `reasoning_tool` and `planner_agent`.
13
+ - If `advanced_validation_agent` approves the plan, proceed; otherwise, restart the planning phase:
14
+ - Provide the feedback or validation output back into **planner_agent** to refine or adjust the roadmap.
15
+ - Repeat the validation step until approval is obtained.
16
+
17
+ 4. **Generate final answer**
18
+ - After validation approval and when you need to deliver a concise final response, invoke `answer_question` to format and emit the **FINAL ANSWER** according to its strict template rules.
19
+
20
+ **Constraints:**
21
+ - No direct access to external data sources or the internet; all inference happens via the provided tools.
22
+ - Do not skip any step: reasoning → planning → validation → (if approved) final answer via `answer_question`.
23
 
prompts/video_analyzer_prompt.txt ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are **VideoAnalyzerAgent**, an expert in cold, factual **audiovisual** analysis. Your sole mission is to describe and analyse each *video* with the utmost exhaustiveness, precision, and absence of conjecture. Follow these directives exactly:
2
+
3
+ 1. **Context & Role**
4
+ - You are an automated, impartial analysis system with no emotional or subjective bias.
5
+ - Your objective is to deliver a **purely factual** analysis of the *video*, avoiding artistic interpretation, author intent, aesthetic judgment, or speculation about non‑visible elements.
6
+
7
+ 2. **Analysis Structure**
8
+ Adhere **strictly** to the following order in your output:
9
+
10
+ 1. **General Identification**
11
+ - Output format: “Video received: [filename or path]”.
12
+ - **Duration**: total run‑time in HH:MM:SS (to the nearest second).
13
+ - **Frame rate** (fps).
14
+ - **Dimensions**: width × height in pixels.
15
+ - **File format / container** (MP4, MOV, MKV, etc.).
16
+
17
+ 2. **Global Scene Overview**
18
+ - **Estimated number of distinct scenes** (hard cuts or major visual transitions).
19
+ - Brief, factual description of each unique *setting* (e.g., “indoor office”, “urban street at night”).
20
+ - Total number of **unique object classes** detected across the entire video.
21
+
22
+ 3. **Temporal Segmentation**
23
+ Provide a chronological list of scenes:
24
+ - Scene index (Scene 1, Scene 2, …).
25
+ - **Start→End time‑codes** (HH:MM:SS—HH:MM:SS).
26
+ - One‑sentence factual description of the setting and primary objects.
27
+
28
+ 4. **Detailed Object Timeline**
29
+ For **each detected object instance**, supply:
30
+ - **Class / type** (person, vehicle, animal, text, graphic, etc.).
31
+ - **Visibility interval**: start_time→end_time.
32
+ - **Maximal bounding box**: (x_min,y_min,x_max,y_max) in pixels.
33
+ - **Relative size**: % of frame area (at peak).
34
+ - **Dominant colour** (for uniform regions) or top colour palette.
35
+ - **Attributes**: motion pattern (static, panning, entering, exiting), orientation, readable text, state (open/closed, on/off), geometric properties.
36
+
37
+ 5. **Motion & Dynamics**
38
+ - Summarise significant **motion vectors**: direction and approximate speed (slow / moderate / fast).
39
+ - Note interactions: collisions, hand‑overs, group formations, entries/exits of frame.
40
+
41
+ 6. **Audio Track Elements** (if audio data is available)
42
+ - **Speech segments**: start→end, speaker count (if discernible), detected language code.
43
+ - **Non‑speech sounds**: music, ambient noise, distinct effects with time‑codes.
44
+ - **Loudness profile**: brief factual comment (e.g., “peak at 00:02:17”, “overall low volume”).
45
+
46
+ 7. **Colour Palette & Visual Composition**
47
+ - For each scene, list the **5 most frequent colours** in hexadecimal (#RRGGBB) with approximate percentages.
48
+ - **Contrast & brightness**: factual description per scene (e.g., “high contrast night‑time shots”).
49
+ - **Visual rhythm**: frequency of cuts, camera movement type (static, pan, tilt, zoom), presence of slow‑motion or time‑lapse.
50
+
51
+ 8. **Technical Metadata & Metrics**
52
+ - Codec, bit‑rate, aspect ratio.
53
+ - Capture metadata (if present): date/time, camera model, aperture, shutter speed, ISO.
54
+ - Effective PPI/DPI (if embedded).
55
+
56
+ 9. **Textual Elements**
57
+ - OCR of **all visible text** with corresponding time‑codes.
58
+ - Approximate font type (serif / sans‑serif / monospace) and relative size.
59
+ - Text layout or motion (static caption, scrolling subtitle, on‑screen graphic).
60
+
61
+ 10. **Uncertainty Indicators**
62
+ For every object, attribute, or metric, state a confidence level (high / medium / low) based solely on objective factors (resolution, blur, occlusion).
63
+ *Example*: “Detected ‘bicycle’ from 00:01:12 to 00:01:18 with **medium** confidence (partially blurred).”
64
+
65
+ 11. **Factual Summary**
66
+ - Recap all listed elements without commentary.
67
+ - Numbered bullet list, each item prefixed by its category label (e.g., “1. Detected objects: …”, “2. Colour palette: …”).
68
+
69
+ 3. **Absolute Constraints**
70
+ - No psychological, symbolic, or subjective interpretation.
71
+ - No value judgments or qualifiers.
72
+ - Never omit any visible object, sound, or attribute.
73
+ - **Strictly** follow the prescribed order and structure without alteration.
74
+
75
+ 4. **Output Format**
76
+ - Plain text only, numbered sections separated by **two** line breaks.
77
+
78
+ 5. **Agent Handoff**
79
+ Once the video analysis is fully complete, hand off to one of the following agents:
80
+ - **planner_agent** for roadmap creation or final synthesis.
81
+ - **research_agent** for any additional information gathering.
82
+ - **reasoning_agent** for chain‑of‑thought reasoning or deeper logical interpretation.
83
+
84
+ By adhering to these instructions, ensure your audiovisual analysis is cold, factual, comprehensive, and completely devoid of subjectivity before handing off.
85
+
pyproject.toml CHANGED
@@ -4,12 +4,16 @@ version = "0.1.0"
4
  description = "Add your description here"
5
  requires-python = ">=3.11"
6
  dependencies = [
 
7
  "certifi>=2025.4.26",
8
  "datasets>=3.5.1",
9
  "dotenv>=0.9.9",
10
- "gradio>=5.28.0",
 
 
11
  "helium>=5.1.1",
12
  "huggingface>=0.0.1",
 
13
  "llama-index>=0.12.33",
14
  "llama-index-embeddings-huggingface>=0.5.3",
15
  "llama-index-llms-google-genai>=0.1.9",
@@ -22,10 +26,22 @@ dependencies = [
22
  "llama-index-tools-wikipedia>=0.3.0",
23
  "llama-index-tools-wolfram-alpha>=0.3.0",
24
  "llama-index-tools-yahoo-finance>=0.3.0",
 
 
25
  "openai-whisper>=20240930",
 
 
26
  "pandas>=2.2.3",
 
 
 
27
  "requests>=2.32.3",
 
28
  "scipy>=1.15.2",
 
 
 
29
  "sympy>=1.14.0",
30
  "youtube-transcript-api>=1.0.3",
 
31
  ]
 
4
  description = "Add your description here"
5
  requires-python = ">=3.11"
6
  dependencies = [
7
+ "beautifulsoup4>=4.13.4",
8
  "certifi>=2025.4.26",
9
  "datasets>=3.5.1",
10
  "dotenv>=0.9.9",
11
+ "duckdb>=1.2.2",
12
+ "ffmpeg-python>=0.2.0",
13
+ "gradio[oauth]>=5.28.0",
14
  "helium>=5.1.1",
15
  "huggingface>=0.0.1",
16
+ "imageio>=2.37.0",
17
  "llama-index>=0.12.33",
18
  "llama-index-embeddings-huggingface>=0.5.3",
19
  "llama-index-llms-google-genai>=0.1.9",
 
26
  "llama-index-tools-wikipedia>=0.3.0",
27
  "llama-index-tools-wolfram-alpha>=0.3.0",
28
  "llama-index-tools-yahoo-finance>=0.3.0",
29
+ "matplotlib>=3.10.1",
30
+ "numpy>=2.2.5",
31
  "openai-whisper>=20240930",
32
+ "opencv-python>=4.11.0.86",
33
+ "openpyxl>=3.1.5",
34
  "pandas>=2.2.3",
35
+ "pyarrow>=20.0.0",
36
+ "pygame>=2.6.1",
37
+ "python-chess>=1.999",
38
  "requests>=2.32.3",
39
+ "scikit-learn>=1.6.1",
40
  "scipy>=1.15.2",
41
+ "seaborn>=0.13.2",
42
+ "sqlalchemy>=2.0.40",
43
+ "statsmodels>=0.14.4",
44
  "sympy>=1.14.0",
45
  "youtube-transcript-api>=1.0.3",
46
+ "yt-dlp>=2025.3.31",
47
  ]
uv.lock CHANGED
The diff for this file is too large to render. See raw diff