Coool2 commited on
Commit
a397179
·
verified ·
1 Parent(s): 4bdbe47

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +14 -13
agent.py CHANGED
@@ -41,8 +41,7 @@ from llama_index.readers.file import (
41
  PDFReader,
42
  DocxReader,
43
  CSVReader,
44
- PandasExcelReader,
45
- )
46
  from typing import List, Union
47
  from llama_index.core import VectorStoreIndex, Document, Settings
48
  from llama_index.core.tools import QueryEngineTool
@@ -57,13 +56,6 @@ import sys
57
  import weave
58
  weave.init("gaia-llamaindex-agents")
59
 
60
- # Keep only the debug handler
61
- llama_debug = LlamaDebugHandler(print_trace_on_end=True)
62
- callback_manager = CallbackManager([llama_debug])
63
-
64
- logging.basicConfig(level=logging.INFO)
65
- logging.getLogger("llama_index.core.agent").setLevel(logging.DEBUG)
66
- logging.getLogger("llama_index.llms").setLevel(logging.DEBUG)
67
 
68
  def get_max_memory_config(max_memory_per_gpu):
69
  """Generate max_memory config for available GPUs"""
@@ -105,6 +97,12 @@ embed_model = HuggingFaceEmbedding(
105
  "low_cpu_mem_usage": True, # Still get memory optimization
106
  }
107
  )
 
 
 
 
 
 
108
 
109
  Settings.llm = proj_llm
110
  Settings.embed_model = embed_model
@@ -304,8 +302,9 @@ class DynamicQueryEngineManager:
304
  hybrid_reranker = HybridReranker()
305
 
306
  query_engine = index.as_query_engine(
307
- similarity_top_k=10,
308
  node_postprocessors=[hybrid_reranker],
 
309
  )
310
 
311
  self.query_engine_tool = QueryEngineTool.from_defaults(
@@ -351,18 +350,20 @@ def search_and_extract_content_from_url(query: str) -> List[Document]:
351
 
352
  try:
353
  # Check if it's a YouTube URL
354
- if "youtube" in urlparse(url).netloc:
355
  loader = YoutubeTranscriptReader()
356
  documents = loader.load_data(youtubelinks=[url])
357
  else:
358
  loader = TrafilaturaWebReader()
359
  documents = loader.load_data(urls=[url])
 
 
 
 
360
  except Exception as e:
361
  # Handle any exceptions that occur during content extraction
362
  return [Document(text=f"Error extracting content from URL: {str(e)}")]
363
 
364
- return documents
365
-
366
  def enhanced_web_search_and_update(query: str) -> str:
367
  """
368
  Performs web search, extracts content, and adds it to the dynamic query engine.
 
41
  PDFReader,
42
  DocxReader,
43
  CSVReader,
44
+ PandasExcelReader)
 
45
  from typing import List, Union
46
  from llama_index.core import VectorStoreIndex, Document, Settings
47
  from llama_index.core.tools import QueryEngineTool
 
56
  import weave
57
  weave.init("gaia-llamaindex-agents")
58
 
 
 
 
 
 
 
 
59
 
60
  def get_max_memory_config(max_memory_per_gpu):
61
  """Generate max_memory config for available GPUs"""
 
97
  "low_cpu_mem_usage": True, # Still get memory optimization
98
  }
99
  )
100
+ llama_debug = LlamaDebugHandler(print_trace_on_end=True)
101
+ callback_manager = CallbackManager([llama_debug])
102
+
103
+ logging.basicConfig(level=logging.INFO)
104
+ logging.getLogger("llama_index.core.agent").setLevel(logging.DEBUG)
105
+ logging.getLogger("llama_index.llms").setLevel(logging.DEBUG)
106
 
107
  Settings.llm = proj_llm
108
  Settings.embed_model = embed_model
 
302
  hybrid_reranker = HybridReranker()
303
 
304
  query_engine = index.as_query_engine(
305
+ similarity_top_k=20,
306
  node_postprocessors=[hybrid_reranker],
307
+ response_mode="tree_summarize"
308
  )
309
 
310
  self.query_engine_tool = QueryEngineTool.from_defaults(
 
350
 
351
  try:
352
  # Check if it's a YouTube URL
353
+ if "youtube" in urlparse(url).netloc or "youtu.be" in urlparse(url).netloc:
354
  loader = YoutubeTranscriptReader()
355
  documents = loader.load_data(youtubelinks=[url])
356
  else:
357
  loader = TrafilaturaWebReader()
358
  documents = loader.load_data(urls=[url])
359
+ for doc in documents:
360
+ doc.metadata["source"] = url
361
+ doc.metadata["type"] = "web_text"
362
+ return documents
363
  except Exception as e:
364
  # Handle any exceptions that occur during content extraction
365
  return [Document(text=f"Error extracting content from URL: {str(e)}")]
366
 
 
 
367
  def enhanced_web_search_and_update(query: str) -> str:
368
  """
369
  Performs web search, extracts content, and adds it to the dynamic query engine.