Final_Assignment_Template

Running

App Files Files Community

Coool2 commited on 17 days ago

Commit

6395478

verified ·

1 Parent(s): a5e7aff

Rename tools.py to agent.py

Browse files

Files changed (2) hide show

agent.py +492 -0
tools.py +0 -0

agent.py ADDED Viewed

	@@ -0,0 +1,492 @@

+from llama_index.core.agent.workflow import FunctionAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.core import VectorStoreIndex, Document
+from llama_index.core.node_parser import SentenceWindowNodeParser, HierarchicalNodeParser
+from llama_index.core.postprocessor import SentenceTransformerRerank
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.huggingface import HuggingFaceInferenceAPI
+from llama_index.core.retrievers import VectorIndexRetriever
+from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.readers.file import PDFReader, DocxReader, CSVReader, ImageReader
+from llama_index.llms.huggingface import HuggingFaceInferenceAPI
+import os
+from typing import List, Dict, Any
+# LLM definitions
+multimodal_llm = HuggingFaceInferenceAPI(
+    model_name="microsoft/Phi-3.5-vision-instruct",
+    token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
+)
+# Replace your current text_llm with:
+text_llm = HuggingFaceInferenceAPI(
+    model_name="Qwen/Qwen2.5-72B-Instruct",
+    token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
+)
+class EnhancedRAGQueryEngine:
+    def __init__(self, task_context: str = ""):
+        self.task_context = task_context
+        self.embed_model = HuggingFaceEmbedding("BAAI/bge-small-en-v1.5")
+        self.reranker = SentenceTransformerRerank(model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=5)
+        self.readers = {
+            '.pdf': PDFReader(),
+            '.docx': DocxReader(),
+            '.doc': DocxReader(),
+            '.csv': CSVReader(),
+            '.txt': lambda file_path: [Document(text=open(file_path, 'r').read())],
+            '.jpg': ImageReader(),
+            '.jpeg': ImageReader(),
+            '.png': ImageReader()
+        }
+        self.sentence_window_parser = SentenceWindowNodeParser.from_defaults(
+            window_size=3,
+            window_metadata_key="window",
+            original_text_metadata_key="original_text"
+        )
+        self.hierarchical_parser = HierarchicalNodeParser.from_defaults(
+            chunk_sizes=[2048, 512, 128]
+        )
+    def load_and_process_documents(self, file_paths: List[str]) -> List[Document]:
+        documents = []
+        for file_path in file_paths:
+            file_ext = os.path.splitext(file_path)[1].lower()
+            try:
+                if file_ext in self.readers:
+                    reader = self.readers[file_ext]
+                    if callable(reader):
+                        docs = reader(file_path)
+                    else:
+                        docs = reader.load_data(file=file_path)
+                    # Add metadata to all documents
+                    for doc in docs:
+                        doc.metadata.update({
+                            "file_path": file_path,
+                            "file_type": file_ext[1:],
+                            "task_context": self.task_context
+                        })
+                    documents.extend(docs)
+            except Exception as e:
+                # Fallback to text reading
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                    documents.append(Document(
+                        text=content,
+                        metadata={"file_path": file_path, "file_type": "text", "error": str(e)}
+                    ))
+                except:
+                    print(f"Failed to process {file_path}: {e}")
+        return documents
+    def create_advanced_index(self, documents: List[Document], use_hierarchical: bool = False) -> VectorStoreIndex:
+        if use_hierarchical or len(documents) > 10:
+            nodes = self.hierarchical_parser.get_nodes_from_documents(documents)
+        else:
+            nodes = self.sentence_window_parser.get_nodes_from_documents(documents)
+        index = VectorStoreIndex(
+            nodes,
+            embed_model=self.embed_model
+        )
+        return index
+    def create_context_aware_query_engine(self, index: VectorStoreIndex):
+        retriever = VectorIndexRetriever(
+            index=index,
+            similarity_top_k=10,
+            embed_model=self.embed_model
+        )
+        query_engine = RetrieverQueryEngine(
+            retriever=retriever,
+            node_postprocessors=[self.reranker],
+            llm=multimodal_llm
+        )
+        return query_engine
+def comprehensive_rag_analysis(file_paths: List[str], query: str, task_context: str = "") -> str:
+    try:
+        rag_engine = EnhancedRAGQueryEngine(task_context)
+        documents = rag_engine.load_and_process_documents(file_paths)
+        if not documents:
+            return "No documents could be processed successfully."
+        total_text_length = sum(len(doc.text) for doc in documents)
+        use_hierarchical = total_text_length > 50000 or len(documents) > 5
+        index = rag_engine.create_advanced_index(documents, use_hierarchical)
+        query_engine = rag_engine.create_context_aware_query_engine(index)
+        enhanced_query = f"""
+        Task Context: {task_context}
+        Original Query: {query}
+        Please analyze the provided documents and answer the query with precise, factual information.
+        """
+        response = query_engine.query(enhanced_query)
+        result = f"**RAG Analysis Results:**\n\n"
+        result += f"**Documents Processed:** {len(documents)}\n"
+        result += f"**Answer:**\n{response.response}\n\n"
+        return result
+    except Exception as e:
+        return f"RAG analysis failed: {str(e)}"
+def cross_document_analysis(file_paths: List[str], query: str, task_context: str = "") -> str:
+    try:
+        rag_engine = EnhancedRAGQueryEngine(task_context)
+        all_documents = []
+        document_groups = {}
+        for file_path in file_paths:
+            docs = rag_engine.load_and_process_documents([file_path])
+            doc_key = os.path.basename(file_path)
+            document_groups[doc_key] = docs
+            for doc in docs:
+                doc.metadata.update({
+                    "document_group": doc_key,
+                    "total_documents": len(file_paths)
+                })
+            all_documents.extend(docs)
+        index = rag_engine.create_advanced_index(all_documents, use_hierarchical=True)
+        query_engine = rag_engine.create_context_aware_query_engine(index)
+        response = query_engine.query(f"Task: {task_context}\nQuery: {query}")
+        result = f"**Cross-Document Analysis:**\n"
+        result += f"**Documents:** {list(document_groups.keys())}\n"
+        result += f"**Answer:**\n{response.response}\n"
+        return result
+    except Exception as e:
+        return f"Cross-document analysis failed: {str(e)}"
+# Create tools
+enhanced_rag_tool = FunctionTool.from_defaults(
+    fn=comprehensive_rag_analysis,
+    name="Enhanced RAG Analysis",
+    description="Comprehensive document analysis using advanced RAG with hybrid search and context-aware processing"
+)
+cross_document_tool = FunctionTool.from_defaults(
+    fn=cross_document_analysis,
+    name="Cross-Document Analysis",
+    description="Advanced analysis across multiple documents with cross-referencing capabilities"
+)
+# Analysis Agent
+analysis_agent = FunctionAgent(
+    name="AnalysisAgent",
+    description="Advanced multimodal analysis using enhanced RAG with hybrid search and cross-document capabilities",
+    system_prompt="""
+    You are an advanced analysis specialist with access to:
+    - Enhanced RAG with hybrid search and reranking
+    - Multi-format document processing (PDF, Word, CSV, images, text)
+    - Cross-document analysis and synthesis
+    - Context-aware query processing
+    Your capabilities:
+    1. Process multiple file types simultaneously
+    2. Perform semantic search across document collections
+    3. Cross-reference information between documents
+    4. Extract precise information with source attribution
+    5. Handle both text and visual content analysis
+    Always consider the GAIA task context and provide precise, well-sourced answers.
+    """,
+    llm=multimodal_llm,
+    tools=[enhanced_rag_tool, cross_document_tool],
+    can_handoff_to=["CodeAgent", "ResearchAgent"]
+)
+from llama_index.readers.web import SimpleWebPageReader
+from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
+from llama_index.tools.arxiv import ArxivToolSpec
+import duckduckgo_search as ddg
+import re
+from typing import List
+class IntelligentSourceRouter:
+    def __init__(self):
+        # Initialize tools - only ArXiv and web search
+        self.arxiv_spec = ArxivToolSpec()
+        # Add web content loader
+        self.web_reader = SimpleWebPageReader()
+        # Create OnDemandLoaderTool for web content
+        self.web_loader_tool = OnDemandLoaderTool.from_defaults(
+            self.web_reader,
+            name="Web Content Loader",
+            description="Load and analyze web page content with intelligent chunking and search"
+        )
+    def web_search_fallback(self, query: str, max_results: int = 5) -> str:
+        try:
+            results = ddg.DDGS().text(query, max_results=max_results)
+            return "\n".join([f"{i}. **{r['title']}**\n   URL: {r['href']}\n   {r['body']}" for i, r in enumerate(results, 1)])
+        except Exception as e:
+            return f"Search failed: {str(e)}"
+    def extract_web_content(self, urls: List[str], query: str) -> str:
+        """Extract and analyze content from web URLs"""
+        try:
+            content_results = []
+            for url in urls[:3]:  # Limit to top 3 URLs
+                try:
+                    result = self.web_loader_tool.call(
+                        urls=[url],
+                        query=f"Extract information relevant to: {query}"
+                    )
+                    content_results.append(f"**Content from {url}:**\n{result}")
+                except Exception as e:
+                    content_results.append(f"**Failed to load {url}**: {str(e)}")
+            return "\n\n".join(content_results)
+        except Exception as e:
+            return f"Content extraction failed: {str(e)}"
+    def detect_intent_and_route(self, query: str) -> str:
+        # Simple LLM-based discrimination: scientific vs non-scientific
+        intent_prompt = f"""
+        Analyze this query and determine if it's scientific research or general information:
+        Query: "{query}"
+        Choose ONE source:
+        - arxiv: For scientific research, academic papers, technical studies, algorithms, experiments
+        - web_search: For all other information (current events, general facts, weather, how-to guides, etc.)
+        Respond with ONLY "arxiv" or "web_search".
+        """
+        response = text_llm.complete(intent_prompt)
+        selected_source = response.text.strip().lower()
+        # Execute search and extract content
+        results = [f"**Query**: {query}", f"**Selected Source**: {selected_source}", "="*50]
+        try:
+            if selected_source == 'arxiv':
+                result = self.arxiv_spec.to_tool_list()[0].call(query=query, max_results=3)
+                results.append(f"**ArXiv Research:**\n{result}")
+            else:  # Default to web_search for everything else
+                # Get search results
+                search_results = self.web_search_fallback(query, 5)
+                results.append(f"**Web Search Results:**\n{search_results}")
+                # Extract URLs and load content
+                urls = re.findall(r'URL: (https?://[^\s]+)', search_results)
+                if urls:
+                    web_content = self.extract_web_content(urls, query)
+                    results.append(f"**Extracted Web Content:**\n{web_content}")
+        except Exception as e:
+            results.append(f"**Search failed**: {str(e)}")
+        return "\n\n".join(results)
+# Initialize router
+intelligent_router = IntelligentSourceRouter()
+# Create enhanced research tool
+def enhanced_smart_research_tool(query: str, task_context: str = "", max_results: int = 5) -> str:
+    full_query = f"{query} {task_context}".strip()
+    return intelligent_router.detect_intent_and_route(full_query)
+enhanced_research_tool_func = FunctionTool.from_defaults(
+    fn=enhanced_smart_research_tool,
+    name="Enhanced Research Tool",
+    description="Intelligent research tool that discriminates between scientific (ArXiv) and general (web) research with deep content extraction"
+)
+# Updated research agent
+research_agent = FunctionAgent(
+    name="ResearchAgent",
+    description="Advanced research agent that automatically routes between scientific and general research sources",
+    system_prompt="""
+    You are an advanced research specialist that automatically discriminates between:
+    **Scientific Research** → ArXiv
+    - Academic papers, research studies
+    - Technical algorithms and methods
+    - Scientific experiments and theories
+    **General Research** → Web Search with Content Extraction
+    - Current events and news
+    - General factual information
+    - How-to guides and technical documentation
+    - Weather, locations, biographical info
+    You automatically:
+    1. **Route queries** to the most appropriate source
+    2. **Extract deep content** from web pages (not just snippets)
+    3. **Analyze and synthesize** information comprehensively
+    4. **Provide detailed answers** with source attribution
+    Always focus on extracting the most relevant information for the GAIA task.
+    """,
+    llm=text_llm,
+    tools=[enhanced_research_tool_func],
+    can_handoff_to=["AnalysisAgent", "CodeAgent"]
+)
+from llama_index.core.agent.workflow import ReActAgent
+def execute_python_code(code: str) -> str:
+    try:
+        safe_globals = {
+            "__builtins__": {
+                "len": len, "str": str, "int": int, "float": float,
+                "list": list, "dict": dict, "sum": sum, "max": max, "min": min,
+                "round": round, "abs": abs, "sorted": sorted
+            },
+            "math": __import__("math"),
+            "datetime": __import__("datetime"),
+            "re": __import__("re")
+        }
+        exec_locals = {}
+        exec(code, safe_globals, exec_locals)
+        if 'result' in exec_locals:
+            return str(exec_locals['result'])
+        else:
+            return "Code executed successfully"
+    except Exception as e:
+        return f"Code execution failed: {str(e)}"
+code_execution_tool = FunctionTool.from_defaults(
+    fn=execute_python_code,
+    name="Python Code Execution",
+    description="Execute Python code safely for calculations and data processing"
+)
+# Code Agent as ReActAgent
+code_agent = ReActAgent(
+    name="CodeAgent",
+    description="Advanced calculations, data processing, and final answer synthesis using ReAct reasoning",
+    system_prompt="""
+    You are a coding and reasoning specialist using ReAct methodology.
+    For each task:
+    1. THINK: Analyze what needs to be calculated or processed
+    2. ACT: Execute appropriate code or calculations
+    3. OBSERVE: Review results and determine if more work is needed
+    4. REPEAT: Continue until you have the final answer
+    Always show your reasoning process clearly and provide exact answers as required by GAIA.
+    """,
+    llm=text_llm,
+    tools=[code_execution_tool],
+    can_handoff_to=["ResearchAgent", "AnalysisAgent"]
+)
+class TaskRouter:
+    def __init__(self):
+        self.agents = {
+            "AnalysisAgent": analysis_agent,
+            "ResearchAgent": research_agent,
+            "CodeAgent": code_agent
+        }
+    def route_task(self, question_data: Dict[str, Any]) -> str:
+        question = question_data.get("Question", "").lower()
+        has_files = "file_name" in question_data
+        # Routing logic
+        if has_files:
+            if any(keyword in question for keyword in ["image", "chart", "graph", "picture", "pdf", "document", "csv"]):
+                return "AnalysisAgent"
+        if any(keyword in question for keyword in ["calculate", "compute", "math", "number", "formula"]):
+            return "CodeAgent"
+        if any(keyword in question for keyword in ["search", "find", "who", "what", "when", "where", "research"]):
+            return "ResearchAgent"
+        return "AnalysisAgent"  # Default
+    def get_agent(self, agent_name: str):
+        return self.agents.get(agent_name, self.agents["AnalysisAgent"])
+class EnhancedGAIAAgent:
+    def __init__(self):
+        self.router = TaskRouter()
+        # Main ReActAgent that coordinates everything
+        self.main_agent = ReActAgent(
+            name="MainGAIAAgent",
+            description="Main GAIA agent that coordinates research, analysis, and computation to solve complex questions",
+            system_prompt="""
+            You are the main GAIA agent coordinator using ReAct reasoning methodology.
+            Your process:
+            1. THINK: Analyze the GAIA question and determine what information/analysis is needed
+            2. ACT: Delegate to appropriate specialist agents (Research, Analysis, Code)
+            3. OBSERVE: Review the results from specialist agents
+            4. THINK: Determine if you have enough information for a final answer
+            5. ACT: Either request more information or provide the final answer
+            Available specialist agents:
+            - ResearchAgent: For web search, ArXiv research
+            - AnalysisAgent: For document/image analysis using RAG
+            - CodeAgent: For calculations and data processing
+            Always provide precise, exact answers as required by GAIA format.
+            """,
+            llm=text_llm,
+            tools=[
+                enhanced_research_tool_func,
+                enhanced_rag_tool,
+                cross_document_tool,
+                code_execution_tool
+            ]
+        )
+    async def solve_gaia_question(self, question_data: Dict[str, Any]) -> str:
+        question = question_data.get("Question", "")
+        task_id = question_data.get("task_id", "")
+        # Prepare comprehensive context
+        context_prompt = f"""
+        GAIA Task ID: {task_id}
+        Question: {question}
+        {'Associated files: ' + question_data.get('file_name', '') if 'file_name' in question_data else 'No files provided'}
+        Instructions:
+        1. Analyze this GAIA question carefully using ReAct reasoning
+        2. Determine what information, analysis, or calculations are needed
+        3. Use appropriate tools to gather information and perform analysis
+        4. Synthesize findings into a precise, exact answer
+        5. Ensure your answer format matches GAIA requirements (exact, concise)
+        Begin your ReAct reasoning process now.
+        """
+        # Execute main agent
+        response = self.main_agent.chat(context_prompt)
+        return str(response)

tools.py DELETED Viewed

File without changes