Update agent.py
Browse files
agent.py
CHANGED
@@ -41,8 +41,7 @@ from llama_index.readers.file import (
|
|
41 |
PDFReader,
|
42 |
DocxReader,
|
43 |
CSVReader,
|
44 |
-
PandasExcelReader
|
45 |
-
)
|
46 |
from typing import List, Union
|
47 |
from llama_index.core import VectorStoreIndex, Document, Settings
|
48 |
from llama_index.core.tools import QueryEngineTool
|
@@ -57,13 +56,6 @@ import sys
|
|
57 |
import weave
|
58 |
weave.init("gaia-llamaindex-agents")
|
59 |
|
60 |
-
# Keep only the debug handler
|
61 |
-
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
|
62 |
-
callback_manager = CallbackManager([llama_debug])
|
63 |
-
|
64 |
-
logging.basicConfig(level=logging.INFO)
|
65 |
-
logging.getLogger("llama_index.core.agent").setLevel(logging.DEBUG)
|
66 |
-
logging.getLogger("llama_index.llms").setLevel(logging.DEBUG)
|
67 |
|
68 |
def get_max_memory_config(max_memory_per_gpu):
|
69 |
"""Generate max_memory config for available GPUs"""
|
@@ -105,6 +97,12 @@ embed_model = HuggingFaceEmbedding(
|
|
105 |
"low_cpu_mem_usage": True, # Still get memory optimization
|
106 |
}
|
107 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
Settings.llm = proj_llm
|
110 |
Settings.embed_model = embed_model
|
@@ -304,8 +302,9 @@ class DynamicQueryEngineManager:
|
|
304 |
hybrid_reranker = HybridReranker()
|
305 |
|
306 |
query_engine = index.as_query_engine(
|
307 |
-
similarity_top_k=
|
308 |
node_postprocessors=[hybrid_reranker],
|
|
|
309 |
)
|
310 |
|
311 |
self.query_engine_tool = QueryEngineTool.from_defaults(
|
@@ -351,18 +350,20 @@ def search_and_extract_content_from_url(query: str) -> List[Document]:
|
|
351 |
|
352 |
try:
|
353 |
# Check if it's a YouTube URL
|
354 |
-
if "youtube" in urlparse(url).netloc:
|
355 |
loader = YoutubeTranscriptReader()
|
356 |
documents = loader.load_data(youtubelinks=[url])
|
357 |
else:
|
358 |
loader = TrafilaturaWebReader()
|
359 |
documents = loader.load_data(urls=[url])
|
|
|
|
|
|
|
|
|
360 |
except Exception as e:
|
361 |
# Handle any exceptions that occur during content extraction
|
362 |
return [Document(text=f"Error extracting content from URL: {str(e)}")]
|
363 |
|
364 |
-
return documents
|
365 |
-
|
366 |
def enhanced_web_search_and_update(query: str) -> str:
|
367 |
"""
|
368 |
Performs web search, extracts content, and adds it to the dynamic query engine.
|
|
|
41 |
PDFReader,
|
42 |
DocxReader,
|
43 |
CSVReader,
|
44 |
+
PandasExcelReader)
|
|
|
45 |
from typing import List, Union
|
46 |
from llama_index.core import VectorStoreIndex, Document, Settings
|
47 |
from llama_index.core.tools import QueryEngineTool
|
|
|
56 |
import weave
|
57 |
weave.init("gaia-llamaindex-agents")
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
def get_max_memory_config(max_memory_per_gpu):
|
61 |
"""Generate max_memory config for available GPUs"""
|
|
|
97 |
"low_cpu_mem_usage": True, # Still get memory optimization
|
98 |
}
|
99 |
)
|
100 |
+
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
|
101 |
+
callback_manager = CallbackManager([llama_debug])
|
102 |
+
|
103 |
+
logging.basicConfig(level=logging.INFO)
|
104 |
+
logging.getLogger("llama_index.core.agent").setLevel(logging.DEBUG)
|
105 |
+
logging.getLogger("llama_index.llms").setLevel(logging.DEBUG)
|
106 |
|
107 |
Settings.llm = proj_llm
|
108 |
Settings.embed_model = embed_model
|
|
|
302 |
hybrid_reranker = HybridReranker()
|
303 |
|
304 |
query_engine = index.as_query_engine(
|
305 |
+
similarity_top_k=20,
|
306 |
node_postprocessors=[hybrid_reranker],
|
307 |
+
response_mode="tree_summarize"
|
308 |
)
|
309 |
|
310 |
self.query_engine_tool = QueryEngineTool.from_defaults(
|
|
|
350 |
|
351 |
try:
|
352 |
# Check if it's a YouTube URL
|
353 |
+
if "youtube" in urlparse(url).netloc or "youtu.be" in urlparse(url).netloc:
|
354 |
loader = YoutubeTranscriptReader()
|
355 |
documents = loader.load_data(youtubelinks=[url])
|
356 |
else:
|
357 |
loader = TrafilaturaWebReader()
|
358 |
documents = loader.load_data(urls=[url])
|
359 |
+
for doc in documents:
|
360 |
+
doc.metadata["source"] = url
|
361 |
+
doc.metadata["type"] = "web_text"
|
362 |
+
return documents
|
363 |
except Exception as e:
|
364 |
# Handle any exceptions that occur during content extraction
|
365 |
return [Document(text=f"Error extracting content from URL: {str(e)}")]
|
366 |
|
|
|
|
|
367 |
def enhanced_web_search_and_update(query: str) -> str:
|
368 |
"""
|
369 |
Performs web search, extracts content, and adds it to the dynamic query engine.
|