Coool2 commited on
Commit
8687bfb
·
verified ·
1 Parent(s): 35a713b

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +42 -19
agent.py CHANGED
@@ -159,17 +159,34 @@ read_and_parse_tool = FunctionTool.from_defaults(
159
  )
160
  )
161
 
162
- def create_rag_tool_fn(documents: List[Document]) -> QueryEngineTool:
163
  """
164
- Creates a RAG query engine tool from a list of documents using advanced components.
165
- Inspired by 'create_advanced_index' and 'create_context_aware_query_engine' methods.
166
-
 
 
167
  Args:
168
- documents: A list of LlamaIndex Document objects from the read_and_parse_tool.
169
-
 
 
 
 
170
  Returns:
171
- A QueryEngineTool configured for the agent to use in the current task.
172
- """
 
 
 
 
 
 
 
 
 
 
 
173
  if not documents:
174
  return None
175
 
@@ -215,6 +232,10 @@ def create_rag_tool_fn(documents: List[Document]) -> QueryEngineTool:
215
  "The input is a natural language question about the documents' content."
216
  )
217
  )
 
 
 
 
218
 
219
  return rag_engine_tool
220
 
@@ -222,13 +243,14 @@ create_rag_tool = FunctionTool.from_defaults(
222
  fn=create_rag_tool_fn,
223
  name="create_rag_tool",
224
  description=(
225
- "Use this tool to create a Retrieval Augmented Generation (RAG) engine from a set of documents. "
226
- "Input should be a list of documents or document paths. The tool processes these documents to build a vector index "
227
- "and a query engine that enables natural language querying over the document content. "
228
- "This tool is essential for enabling efficient and context-aware information retrieval in complex document collections."
 
 
229
  )
230
  )
231
-
232
  # 1. Create the base DuckDuckGo search tool from the official spec.
233
  # This tool returns text summaries of search results, not just URLs.
234
  base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
@@ -295,15 +317,16 @@ def create_forced_rag_pipeline():
295
  forced_rag_pipeline = create_forced_rag_pipeline()
296
 
297
  # Remplacer les tools individuels par le pipeline
298
- process_docs_urls_tool = FunctionTool.from_defaults(
299
  fn=lambda input_path: forced_rag_pipeline.run(input_path),
300
  name="process_docs_urls_tool",
301
  description=(
302
- "This is the PRIMARY and BEST tool to extract required information from URLs or documents. "
303
- "It AUTOMATICALLY processes documents or URLs with read_and_parse for complete content extraction and parsing, "
304
- "then creates a RAG query engine optimized for information retrieval. This tool is specifically designed to "
305
- "handle web pages, PDFs, documents and extract specific information efficiently. "
306
- "DO NOT try to access pages manually - use this tool instead for superior information extraction and querying capabilities."
 
307
  )
308
  )
309
 
 
159
  )
160
  )
161
 
162
+ def create_rag_tool_fn(documents: List[Document], query: str = None) -> Union[QueryEngineTool, str]:
163
  """
164
+ Creates a RAG query engine tool from documents with advanced indexing and querying capabilities.
165
+
166
+ This function implements a sophisticated RAG pipeline using hierarchical or sentence-window parsing
167
+ depending on document count, vector indexing, and reranking for optimal information retrieval.
168
+
169
  Args:
170
+ documents (List[Document]): A list of LlamaIndex Document objects from read_and_parse_tool.
171
+ Must not be empty to create a valid RAG engine.
172
+ query (str, optional): If provided, immediately queries the created RAG engine and returns
173
+ the answer as a string. If None, returns the QueryEngineTool for later use.
174
+ Defaults to None.
175
+
176
  Returns:
177
+ Union[QueryEngineTool, str]:
178
+ - QueryEngineTool: When query=None, returns a tool configured for agent use with
179
+ advanced reranking and similarity search capabilities.
180
+ - str: When query is provided, returns the direct answer from the RAG engine.
181
+ - None: When documents list is empty.
182
+
183
+ Examples:
184
+ Create a RAG tool for later use:
185
+ >>> rag_tool = create_rag_tool_fn(documents)
186
+
187
+ Get immediate answer from documents:
188
+ >>> answer = create_rag_tool_fn(documents, query="What is the main topic?")
189
+ """
190
  if not documents:
191
  return None
192
 
 
232
  "The input is a natural language question about the documents' content."
233
  )
234
  )
235
+
236
+ if query :
237
+ result = rag_engine_tool.query_engine.query(query)
238
+ return str(result)
239
 
240
  return rag_engine_tool
241
 
 
243
  fn=create_rag_tool_fn,
244
  name="create_rag_tool",
245
  description=(
246
+ "Use this tool to build a Retrieval Augmented Generation (RAG) engine from documents AND optionally query it immediately. "
247
+ "Input: documents (list of documents or paths) and optional query parameter. "
248
+ "If no query is provided: creates and returns a RAG query engine tool for later use. "
249
+ "If query is provided: creates the RAG engine AND immediately returns the answer to your question. "
250
+ "This dual-mode tool enables both RAG engine creation and direct question-answering in one step. "
251
+ "Use with query parameter when you want immediate answers from documents, or without query to create a reusable engine."
252
  )
253
  )
 
254
  # 1. Create the base DuckDuckGo search tool from the official spec.
255
  # This tool returns text summaries of search results, not just URLs.
256
  base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
 
317
  forced_rag_pipeline = create_forced_rag_pipeline()
318
 
319
  # Remplacer les tools individuels par le pipeline
320
+ information_retrieval_tool = FunctionTool.from_defaults(
321
  fn=lambda input_path: forced_rag_pipeline.run(input_path),
322
  name="process_docs_urls_tool",
323
  description=(
324
+ "This tool is the PRIMARY and MOST EFFECTIVE method for extracting and retrieving information from URLs or documents. "
325
+ "It AUTOMATICALLY processes any given web pages, PDFs, or document files by first using read_and_parse to fully extract and parse content. "
326
+ "Then, it creates a powerful Retrieval Augmented Generation (RAG) query engine optimized for semantic search and precise information retrieval. "
327
+ "Finally, it applies the RAG engine to answer queries directly, providing efficient and accurate results. "
328
+ "This tool is specifically designed to handle diverse document types and web content, ensuring superior extraction and querying capabilities. "
329
+ "Avoid manual page access or ad-hoc parsing; always use this tool for best performance and reliability in information extraction and question answering."
330
  )
331
  )
332