Update agent.py
Browse files
agent.py
CHANGED
@@ -159,17 +159,34 @@ read_and_parse_tool = FunctionTool.from_defaults(
|
|
159 |
)
|
160 |
)
|
161 |
|
162 |
-
def create_rag_tool_fn(documents: List[Document]) -> QueryEngineTool:
|
163 |
"""
|
164 |
-
Creates a RAG query engine tool from
|
165 |
-
|
166 |
-
|
|
|
|
|
167 |
Args:
|
168 |
-
documents: A list of LlamaIndex Document objects from
|
169 |
-
|
|
|
|
|
|
|
|
|
170 |
Returns:
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
if not documents:
|
174 |
return None
|
175 |
|
@@ -215,6 +232,10 @@ def create_rag_tool_fn(documents: List[Document]) -> QueryEngineTool:
|
|
215 |
"The input is a natural language question about the documents' content."
|
216 |
)
|
217 |
)
|
|
|
|
|
|
|
|
|
218 |
|
219 |
return rag_engine_tool
|
220 |
|
@@ -222,13 +243,14 @@ create_rag_tool = FunctionTool.from_defaults(
|
|
222 |
fn=create_rag_tool_fn,
|
223 |
name="create_rag_tool",
|
224 |
description=(
|
225 |
-
"Use this tool to
|
226 |
-
"Input
|
227 |
-
"
|
228 |
-
"
|
|
|
|
|
229 |
)
|
230 |
)
|
231 |
-
|
232 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
233 |
# This tool returns text summaries of search results, not just URLs.
|
234 |
base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
|
@@ -295,15 +317,16 @@ def create_forced_rag_pipeline():
|
|
295 |
forced_rag_pipeline = create_forced_rag_pipeline()
|
296 |
|
297 |
# Remplacer les tools individuels par le pipeline
|
298 |
-
|
299 |
fn=lambda input_path: forced_rag_pipeline.run(input_path),
|
300 |
name="process_docs_urls_tool",
|
301 |
description=(
|
302 |
-
"This is the PRIMARY and
|
303 |
-
"It AUTOMATICALLY processes
|
304 |
-
"
|
305 |
-
"
|
306 |
-
"
|
|
|
307 |
)
|
308 |
)
|
309 |
|
|
|
159 |
)
|
160 |
)
|
161 |
|
162 |
+
def create_rag_tool_fn(documents: List[Document], query: str = None) -> Union[QueryEngineTool, str]:
|
163 |
"""
|
164 |
+
Creates a RAG query engine tool from documents with advanced indexing and querying capabilities.
|
165 |
+
|
166 |
+
This function implements a sophisticated RAG pipeline using hierarchical or sentence-window parsing
|
167 |
+
depending on document count, vector indexing, and reranking for optimal information retrieval.
|
168 |
+
|
169 |
Args:
|
170 |
+
documents (List[Document]): A list of LlamaIndex Document objects from read_and_parse_tool.
|
171 |
+
Must not be empty to create a valid RAG engine.
|
172 |
+
query (str, optional): If provided, immediately queries the created RAG engine and returns
|
173 |
+
the answer as a string. If None, returns the QueryEngineTool for later use.
|
174 |
+
Defaults to None.
|
175 |
+
|
176 |
Returns:
|
177 |
+
Union[QueryEngineTool, str]:
|
178 |
+
- QueryEngineTool: When query=None, returns a tool configured for agent use with
|
179 |
+
advanced reranking and similarity search capabilities.
|
180 |
+
- str: When query is provided, returns the direct answer from the RAG engine.
|
181 |
+
- None: When documents list is empty.
|
182 |
+
|
183 |
+
Examples:
|
184 |
+
Create a RAG tool for later use:
|
185 |
+
>>> rag_tool = create_rag_tool_fn(documents)
|
186 |
+
|
187 |
+
Get immediate answer from documents:
|
188 |
+
>>> answer = create_rag_tool_fn(documents, query="What is the main topic?")
|
189 |
+
"""
|
190 |
if not documents:
|
191 |
return None
|
192 |
|
|
|
232 |
"The input is a natural language question about the documents' content."
|
233 |
)
|
234 |
)
|
235 |
+
|
236 |
+
if query :
|
237 |
+
result = rag_engine_tool.query_engine.query(query)
|
238 |
+
return str(result)
|
239 |
|
240 |
return rag_engine_tool
|
241 |
|
|
|
243 |
fn=create_rag_tool_fn,
|
244 |
name="create_rag_tool",
|
245 |
description=(
|
246 |
+
"Use this tool to build a Retrieval Augmented Generation (RAG) engine from documents AND optionally query it immediately. "
|
247 |
+
"Input: documents (list of documents or paths) and optional query parameter. "
|
248 |
+
"If no query is provided: creates and returns a RAG query engine tool for later use. "
|
249 |
+
"If query is provided: creates the RAG engine AND immediately returns the answer to your question. "
|
250 |
+
"This dual-mode tool enables both RAG engine creation and direct question-answering in one step. "
|
251 |
+
"Use with query parameter when you want immediate answers from documents, or without query to create a reusable engine."
|
252 |
)
|
253 |
)
|
|
|
254 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
255 |
# This tool returns text summaries of search results, not just URLs.
|
256 |
base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
|
|
|
317 |
forced_rag_pipeline = create_forced_rag_pipeline()
|
318 |
|
319 |
# Remplacer les tools individuels par le pipeline
|
320 |
+
information_retrieval_tool = FunctionTool.from_defaults(
|
321 |
fn=lambda input_path: forced_rag_pipeline.run(input_path),
|
322 |
name="process_docs_urls_tool",
|
323 |
description=(
|
324 |
+
"This tool is the PRIMARY and MOST EFFECTIVE method for extracting and retrieving information from URLs or documents. "
|
325 |
+
"It AUTOMATICALLY processes any given web pages, PDFs, or document files by first using read_and_parse to fully extract and parse content. "
|
326 |
+
"Then, it creates a powerful Retrieval Augmented Generation (RAG) query engine optimized for semantic search and precise information retrieval. "
|
327 |
+
"Finally, it applies the RAG engine to answer queries directly, providing efficient and accurate results. "
|
328 |
+
"This tool is specifically designed to handle diverse document types and web content, ensuring superior extraction and querying capabilities. "
|
329 |
+
"Avoid manual page access or ad-hoc parsing; always use this tool for best performance and reliability in information extraction and question answering."
|
330 |
)
|
331 |
)
|
332 |
|