Spaces:

ragunath-ravi
/

DocAgent

Sleeping

App Files Files Community

ragunath-ravi commited on Jul 21

Commit

02fc469

verified ·

1 Parent(s): 2655c55

Update app.py

Browse files

Files changed (1) hide show

app.py +397 -578

app.py CHANGED Viewed

@@ -1,145 +1,133 @@
 import gradio as gr
 import os
-import tempfile
-import uuid
-from datetime import datetime
-from typing import List, Dict, Any, Optional
 import json
 import asyncio
-from dataclasses import dataclass, asdict
 import logging
-# Document processing imports
-import PyPDF2
-import pandas as pd
-from docx import Document
-from pptx import Presentation
-import markdown
-# ML/AI imports
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
-from langchain.schema import Document as LCDocument
-from huggingface_hub import InferenceClient
-# Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Get HF token from environment
-HF_TOKEN = os.getenv('hf_token')
-# MCP Message Structure
-@dataclass
 class MCPMessage:
-    sender: str
-    receiver: str
-    type: str
-    trace_id: str
-    payload: Dict[str, Any]
-    timestamp: str = None
-    def __post_init__(self):
-        if self.timestamp is None:
-            self.timestamp = datetime.now().isoformat()
     def to_dict(self):
-        return asdict(self)
-# MCP Communication Layer
-class MCPCommunicator:
     def __init__(self):
-        self.message_queue = asyncio.Queue()
         self.subscribers = {}
-    async def send_message(self, message: MCPMessage):
-        logger.info(f"MCP: {message.sender} -> {message.receiver}: {message.type}")
-        await self.message_queue.put(message)
-    async def receive_message(self, agent_name: str) -> MCPMessage:
-        while True:
-            message = await self.message_queue.get()
-            if message.receiver == agent_name:
-                return message
-            # Re-queue if not for this agent
-            await self.message_queue.put(message)
-# Global MCP instance
-mcp = MCPCommunicator()
-# Base Agent Class
-class BaseAgent:
-    def __init__(self, name: str):
-        self.name = name
-        self.mcp = mcp
-    async def send_mcp_message(self, receiver: str, msg_type: str, payload: Dict[str, Any], trace_id: str):
-        message = MCPMessage(
-            sender=self.name,
-            receiver=receiver,
-            type=msg_type,
-            trace_id=trace_id,
-            payload=payload
-        )
-        await self.mcp.send_message(message)
-    async def receive_mcp_message(self) -> MCPMessage:
-        return await self.mcp.receive_message(self.name)
-# Document Ingestion Agent
-class IngestionAgent(BaseAgent):
-    def __init__(self):
-        super().__init__("IngestionAgent")
         self.text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000,
-            chunk_overlap=200,
-            length_function=len,
         )
     def parse_pdf(self, file_path: str) -> str:
-        """Parse PDF file and extract text"""
         try:
             with open(file_path, 'rb') as file:
                 pdf_reader = PyPDF2.PdfReader(file)
                 text = ""
                 for page in pdf_reader.pages:
-                    text += page.extract_text() + "\n"
                 return text
         except Exception as e:
             logger.error(f"Error parsing PDF: {e}")
             return ""
-    def parse_docx(self, file_path: str) -> str:
-        """Parse DOCX file and extract text"""
-        try:
-            doc = Document(file_path)
-            text = ""
-            for paragraph in doc.paragraphs:
-                text += paragraph.text + "\n"
-            return text
-        except Exception as e:
-            logger.error(f"Error parsing DOCX: {e}")
-            return ""
     def parse_pptx(self, file_path: str) -> str:
-        """Parse PPTX file and extract text"""
         try:
             prs = Presentation(file_path)
             text = ""
-            for slide_num, slide in enumerate(prs.slides, 1):
-                text += f"Slide {slide_num}:\n"
                 for shape in slide.shapes:
                     if hasattr(shape, "text"):
                         text += shape.text + "\n"
-                text += "\n"
             return text
         except Exception as e:
             logger.error(f"Error parsing PPTX: {e}")
             return ""
     def parse_csv(self, file_path: str) -> str:
-        """Parse CSV file and convert to text"""
         try:
             df = pd.read_csv(file_path)
             return df.to_string()
@@ -147,584 +135,415 @@ class IngestionAgent(BaseAgent):
             logger.error(f"Error parsing CSV: {e}")
             return ""
-    def parse_txt_md(self, file_path: str) -> str:
-        """Parse TXT or MD file"""
         try:
             with open(file_path, 'r', encoding='utf-8') as file:
-                content = file.read()
-                # If markdown, convert to plain text
-                if file_path.lower().endswith('.md'):
-                    content = markdown.markdown(content)
-                return content
         except Exception as e:
-            logger.error(f"Error parsing TXT/MD: {e}")
             return ""
-    async def process_documents(self, files: List[str], trace_id: str) -> List[LCDocument]:
-        """Process uploaded documents and return chunked documents"""
-        all_documents = []
         for file_path in files:
             file_ext = os.path.splitext(file_path)[1].lower()
-            filename = os.path.basename(file_path)
-            # Parse based on file extension
             if file_ext == '.pdf':
-                content = self.parse_pdf(file_path)
-            elif file_ext == '.docx':
-                content = self.parse_docx(file_path)
             elif file_ext == '.pptx':
-                content = self.parse_pptx(file_path)
             elif file_ext == '.csv':
-                content = self.parse_csv(file_path)
             elif file_ext in ['.txt', '.md']:
-                content = self.parse_txt_md(file_path)
             else:
                 logger.warning(f"Unsupported file type: {file_ext}")
                 continue
-            if content.strip():
-                # Split content into chunks
-                chunks = self.text_splitter.split_text(content)
-                # Create LangChain documents
-                for i, chunk in enumerate(chunks):
-                    doc = LCDocument(
-                        page_content=chunk,
-                        metadata={
-                            "source": filename,
-                            "chunk_id": i,
-                            "file_type": file_ext
-                        }
-                    )
-                    all_documents.append(doc)
-        return all_documents
-# Retrieval Agent
-class RetrievalAgent(BaseAgent):
-    def __init__(self):
-        super().__init__("RetrievalAgent")
-        self.embeddings = HuggingFaceEmbeddings(
-            model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
         self.vector_store = None
-    async def create_vector_store(self, documents: List[LCDocument], trace_id: str):
-        """Create vector store from documents"""
-        try:
-            if documents:
-                self.vector_store = FAISS.from_documents(documents, self.embeddings)
-                logger.info(f"Created vector store with {len(documents)} documents")
-            else:
-                logger.warning("No documents to create vector store")
-        except Exception as e:
-            logger.error(f"Error creating vector store: {e}")
-    async def retrieve_relevant_chunks(self, query: str, k: int = 5, trace_id: str = None) -> List[Dict]:
-        """Retrieve relevant chunks for a query"""
-        if not self.vector_store:
-            return []
-        try:
-            # Similarity search
-            docs = self.vector_store.similarity_search(query, k=k)
-            # Format results
-            results = []
-            for doc in docs:
-                results.append({
-                    "content": doc.page_content,
-                    "source": doc.metadata.get("source", "Unknown"),
-                    "chunk_id": doc.metadata.get("chunk_id", 0),
-                    "file_type": doc.metadata.get("file_type", "Unknown")
-                })
-            return results
-        except Exception as e:
-            logger.error(f"Error retrieving chunks: {e}")
-            return []
-# LLM Response Agent
-class LLMResponseAgent(BaseAgent):
-    def __init__(self):
-        super().__init__("LLMResponseAgent")
-        self.client = InferenceClient(
-            model="meta-llama/Llama-3.1-8B",
-            token=HF_TOKEN
-        )
-    def format_prompt(self, query: str, context_chunks: List[Dict]) -> str:
-        """Format prompt with context and query"""
-        context_text = "\n\n".join([
-            f"Source: {chunk['source']}\nContent: {chunk['content']}"
-            for chunk in context_chunks
-        ])
-        prompt = f"""Based on the following context from uploaded documents, please answer the user's question.
 Context:
 {context_text}
 Question: {query}
-Please provide a comprehensive answer based on the context above. If the context doesn't contain enough information to fully answer the question, please mention what information is available and what might be missing.
 Answer:"""
-        return prompt
-    async def generate_response(self, query: str, context_chunks: List[Dict], trace_id: str) -> str:
-        """Generate response using LLM"""
         try:
-            prompt = self.format_prompt(query, context_chunks)
-            # Generate response using HuggingFace Inference
-            response = self.client.text_generation(
                 prompt,
                 max_new_tokens=512,
                 temperature=0.7,
-                do_sample=True,
-                return_full_text=False
             )
-            return response
         except Exception as e:
             logger.error(f"Error generating response: {e}")
-            return f"I apologize, but I encountered an error while generating the response: {str(e)}"
-# Coordinator Agent
-class CoordinatorAgent(BaseAgent):
-    def __init__(self):
-        super().__init__("CoordinatorAgent")
-        self.ingestion_agent = IngestionAgent()
-        self.retrieval_agent = RetrievalAgent()
-        self.llm_agent = LLMResponseAgent()
-        self.documents_processed = False
-    async def process_documents(self, files: List[str]) -> str:
-        """Orchestrate document processing"""
-        trace_id = str(uuid.uuid4())
-        try:
-            # Step 1: Ingestion
-            await self.send_mcp_message(
-                "IngestionAgent",
-                "DOCUMENT_INGESTION_REQUEST",
-                {"files": files},
-                trace_id
-            )
-            documents = await self.ingestion_agent.process_documents(files, trace_id)
-            await self.send_mcp_message(
-                "RetrievalAgent",
-                "VECTOR_STORE_CREATE_REQUEST",
-                {"documents": len(documents)},
-                trace_id
-            )
-            # Step 2: Create vector store
-            await self.retrieval_agent.create_vector_store(documents, trace_id)
-            self.documents_processed = True
-            return f"Successfully processed {len(documents)} document chunks from {len(files)} files."
-        except Exception as e:
-            logger.error(f"Error in document processing: {e}")
-            return f"Error processing documents: {str(e)}"
-    async def answer_query(self, query: str) -> tuple[str, List[Dict]]:
-        """Orchestrate query answering"""
-        if not self.documents_processed:
-            return "Please upload and process documents first.", []
-        trace_id = str(uuid.uuid4())
-        try:
-            # Step 1: Retrieval
-            await self.send_mcp_message(
-                "RetrievalAgent",
-                "RETRIEVAL_REQUEST",
-                {"query": query},
-                trace_id
-            )
-            context_chunks = await self.retrieval_agent.retrieve_relevant_chunks(query, k=5, trace_id=trace_id)
-            # Step 2: LLM Response
-            await self.send_mcp_message(
-                "LLMResponseAgent",
-                "LLM_GENERATION_REQUEST",
-                {"query": query, "context_chunks": len(context_chunks)},
-                trace_id
-            )
-            response = await self.llm_agent.generate_response(query, context_chunks, trace_id)
-            return response, context_chunks
-        except Exception as e:
-            logger.error(f"Error in query processing: {e}")
-            return f"Error processing query: {str(e)}", []
-# Global coordinator instance
-coordinator = CoordinatorAgent()
-async def process_files(files):
-    """Process uploaded files"""
-    if not files:
-        return "❌ Please upload at least one file."
-    # Save uploaded files to temporary directory
-    file_paths = []
-    for file in files:
-        # Handle file path - Gradio returns file path as string
-        if hasattr(file, 'name'):
-            file_path = file.name
         else:
-            file_path = str(file)
-        file_paths.append(file_path)
-    result = await coordinator.process_documents(file_paths)
-    return result
-async def answer_question(query, history):
-    """Answer user question"""
-    if not query.strip():
-        return history, ""
-    response, context_chunks = await coordinator.answer_query(query)
-    # Format response with sources
-    if context_chunks:
-        sources = "\n\n**Sources:**\n"
-        for i, chunk in enumerate(context_chunks[:3], 1):  # Show top 3 sources
-            sources += f"{i}. {chunk['source']} (Chunk {chunk['chunk_id']})\n"
-        response += sources
-    # Add to chat history
-    history.append((query, response))
-    return history, ""
-# Custom CSS
-custom_css = """
-/* Main container styling */
-.gradio-container {
-    max-width: 1200px !important;
-    margin: 0 auto !important;
-    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
-}
-/* Header styling */
-.header-container {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-    color: white !important;
-    padding: 2rem !important;
-    border-radius: 15px !important;
-    margin-bottom: 2rem !important;
-    text-align: center !important;
-    box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
-}
-.header-title {
-    font-size: 2.5rem !important;
-    font-weight: 700 !important;
-    margin-bottom: 0.5rem !important;
-    text-shadow: 2px 2px 4px rgba(0,0,0,0.3) !important;
-}
-.header-subtitle {
-    font-size: 1.2rem !important;
-    opacity: 0.9 !important;
-    font-weight: 300 !important;
-}
-/* Tab styling */
-.tab-nav {
-    background: white !important;
-    border-radius: 12px !important;
-    box-shadow: 0 4px 20px rgba(0,0,0,0.08) !important;
-    padding: 0.5rem !important;
-    margin-bottom: 1rem !important;
-}
-/* Card styling */
-.setup-card, .upload-card, .chat-card {
-    background: white !important;
-    border-radius: 15px !important;
-    padding: 2rem !important;
-    box-shadow: 0 4px 20px rgba(0,0,0,0.08) !important;
-    border: 1px solid #e1e5e9 !important;
-    margin-bottom: 1.5rem !important;
-}
-/* Button styling */
-.primary-button {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-    color: white !important;
-    border: none !important;
-    border-radius: 10px !important;
-    padding: 0.75rem 2rem !important;
-    font-weight: 600 !important;
-    transition: all 0.3s ease !important;
-    box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important;
-}
-.primary-button:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important;
-}
-/* Chat interface styling */
-.chat-container {
-    max-height: 600px !important;
-    overflow-y: auto !important;
-    background: #f8f9fa !important;
-    border-radius: 15px !important;
-    padding: 1rem !important;
-    border: 1px solid #e1e5e9 !important;
-}
-/* Input styling */
-.input-container input, .input-container textarea {
-    border: 2px solid #e1e5e9 !important;
-    border-radius: 10px !important;
-    padding: 0.75rem 1rem !important;
-    font-size: 1rem !important;
-    transition: border-color 0.3s ease !important;
-}
-.input-container input:focus, .input-container textarea:focus {
-    border-color: #667eea !important;
-    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
-    outline: none !important;
-}
-/* Status indicators */
-.status-success {
-    color: #28a745 !important;
-    background: #d4edda !important;
-    padding: 0.75rem 1rem !important;
-    border-radius: 8px !important;
-    border: 1px solid #c3e6cb !important;
-    margin: 1rem 0 !important;
-}
-.status-error {
-    color: #dc3545 !important;
-    background: #f8d7da !important;
-    padding: 0.75rem 1rem !important;
-    border-radius: 8px !important;
-    border: 1px solid #f5c6cb !important;
-    margin: 1rem 0 !important;
-}
-/* File upload styling */
-.file-upload {
-    border: 2px dashed #667eea !important;
-    border-radius: 15px !important;
-    padding: 2rem !important;
-    text-align: center !important;
-    background: #f8f9ff !important;
-    transition: all 0.3s ease !important;
-}
-.file-upload:hover {
-    border-color: #764ba2 !important;
-    background: #f0f4ff !important;
-}
-/* Architecture diagram container */
-.architecture-container {
-    background: white !important;
-    border-radius: 15px !important;
-    padding: 2rem !important;
-    margin: 1rem 0 !important;
-    box-shadow: 0 4px 20px rgba(0,0,0,0.08) !important;
-    text-align: center !important;
-}
-/* Responsive design */
-@media (max-width: 768px) {
-    .header-title {
-        font-size: 2rem !important;
-    }
-    .setup-card, .upload-card, .chat-card {
-        padding: 1.5rem !important;
-    }
-}
-/* Animation for loading states */
-@keyframes pulse {
-    0% { opacity: 1; }
-    50% { opacity: 0.5; }
-    100% { opacity: 1; }
-}
-.loading {
-    animation: pulse 1.5s ease-in-out infinite !important;
-}
-"""
-# Create Gradio Interface
 def create_interface():
-    with gr.Blocks(css=custom_css, title="🤖 Agentic RAG Chatbot") as demo:
         gr.HTML("""
-        <div class="header-container">
-            <h1 class="header-title">🤖 Agentic RAG Chatbot</h1>
-            <p class="header-subtitle">Multi-Format Document QA using Model Context Protocol (MCP)</p>
-        </div>
         """)
-        with gr.Tabs() as tabs:
-            # Upload Tab
-            with gr.TabItem("📁 Upload Documents", elem_classes=["tab-nav"]):
-                gr.HTML("""
-                <div class="upload-card">
-                    <h3>📄 Document Upload</h3>
-                    <p>Upload your documents in any supported format: PDF, DOCX, PPTX, CSV, TXT, or Markdown.</p>
-                </div>
-                """)
                 file_upload = gr.File(
-                    label="Choose Files",
                     file_count="multiple",
-                    file_types=[".pdf", ".docx", ".pptx", ".csv", ".txt", ".md"],
-                    elem_classes=["file-upload"]
-                )
-                upload_button = gr.Button(
-                    "Process Documents",
-                    variant="primary",
-                    elem_classes=["primary-button"]
                 )
                 upload_status = gr.Textbox(
-                    label="Processing Status",
                     interactive=False,
-                    elem_classes=["input-container"]
                 )
-            # Chat Tab
-            with gr.TabItem("💬 Chat", elem_classes=["tab-nav"]):
-                gr.HTML("""
-                <div class="chat-card">
-                    <h3>🗨️ Ask Questions</h3>
-                    <p>Ask questions about your uploaded documents. The AI will provide answers based on the document content.</p>
-                </div>
                 """)
                 chatbot = gr.Chatbot(
-                    label="Conversation",
-                    height=400,
-                    elem_classes=["chat-container"]
                 )
                 with gr.Row():
-                    query_input = gr.Textbox(
-                        label="Your Question",
-                        placeholder="What are the key findings in the document?",
-                        elem_classes=["input-container"]
-                    )
-                    ask_button = gr.Button(
-                        "Ask",
-                        variant="primary",
-                        elem_classes=["primary-button"]
                     )
                 gr.Examples(
                     examples=[
-                        "What are the main topics covered in the documents?",
                         "Can you summarize the key findings?",
-                        "What are the important metrics mentioned?",
                         "What recommendations are provided?",
                     ],
-                    inputs=query_input,
-                    label="Example Questions"
                 )
-            # Architecture Tab
-            with gr.TabItem("🏗️ Architecture", elem_classes=["tab-nav"]):
-                gr.HTML("""
-                <div class="architecture-container">
-                    <h3>🏛️ System Architecture</h3>
-                    <p>This system uses an agentic architecture with Model Context Protocol (MCP) for inter-agent communication.</p>
-                </div>
-                """)
-                gr.Markdown("""
-                ## 🔄 Agent Flow Diagram
-                ```
-                User Upload → CoordinatorAgent → IngestionAgent → RetrievalAgent → LLMResponseAgent
-                     ↓              ↓                ↓               ↓              ↓
-                 Documents    MCP Messages    Text Chunks    Vector Store    Final Response
-                ```
-                ## 🤖 Agent Descriptions
-                - **CoordinatorAgent**: Orchestrates the entire workflow and manages MCP communication
-                - **IngestionAgent**: Parses and preprocesses documents (PDF, DOCX, PPTX, CSV, TXT, MD)
-                - **RetrievalAgent**: Handles embeddings and semantic retrieval using FAISS
-                - **LLMResponseAgent**: Generates final responses using Llama-3.1-8B-Instruct
-                ## 🔗 Tech Stack
-                - **Frontend**: Gradio with custom CSS
-                - **LLM**: Meta Llama-3.1-8B-Instruct (via HuggingFace Inference)
-                - **Embeddings**: sentence-transformers/all-MiniLM-L6-v2
-                - **Vector Store**: FAISS
-                - **Document Processing**: PyPDF2, python-docx, python-pptx, pandas
-                - **Framework**: LangChain for document handling
-                ## 📨 MCP Message Example
-                ```json
-                {
-                  "sender": "RetrievalAgent",
-                  "receiver": "LLMResponseAgent",
-                  "type": "RETRIEVAL_RESULT",
-                  "trace_id": "rag-457",
-                  "payload": {
-                    "retrieved_context": ["Revenue increased by 25%", "Q1 KPIs exceeded targets"],
-                    "query": "What were the Q1 KPIs?"
-                  },
-                  "timestamp": "2025-07-21T10:30:00Z"
-                }
-                ```
-                """)
         # Event handlers
-        upload_button.click(
-            fn=process_files,
             inputs=[file_upload],
             outputs=[upload_status]
         )
-        ask_button.click(
-            fn=answer_question,
-            inputs=[query_input, chatbot],
-            outputs=[chatbot, query_input]
         )
-        query_input.submit(
-            fn=answer_question,
-            inputs=[query_input, chatbot],
-            outputs=[chatbot, query_input]
         )
-    return demo
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(
         share=True,
         server_name="0.0.0.0",
-        server_port=7860,
-        show_api=False
     )

 import gradio as gr
 import os
 import json
+import uuid
 import asyncio
+from datetime import datetime
+from typing import List, Dict, Any, Optional, Generator
 import logging
+# Import required libraries
+from huggingface_hub import InferenceClient
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
+from langchain.docstore.document import Document
+# Import document parsers
+import PyPDF2
+from pptx import Presentation
+import pandas as pd
+from docx import Document as DocxDocument
+import io
+# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Get HuggingFace token from environment
+HF_TOKEN = os.getenv("hf_token")
+if not HF_TOKEN:
+    raise ValueError("HuggingFace token not found in environment variables")
+# Initialize HuggingFace Inference Client
+client = InferenceClient(model="meta-llama/Llama-3.1-8B-Instruct", token=HF_TOKEN)
+# Initialize embeddings
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 class MCPMessage:
+    """Model Context Protocol Message Structure"""
+    def __init__(self, sender: str, receiver: str, msg_type: str,
+                 trace_id: str = None, payload: Dict = None):
+        self.sender = sender
+        self.receiver = receiver
+        self.type = msg_type
+        self.trace_id = trace_id or str(uuid.uuid4())
+        self.payload = payload or {}
+        self.timestamp = datetime.now().isoformat()
     def to_dict(self):
+        return {
+            "sender": self.sender,
+            "receiver": self.receiver,
+            "type": self.type,
+            "trace_id": self.trace_id,
+            "payload": self.payload,
+            "timestamp": self.timestamp
+        }
+class MessageBus:
+    """In-memory message bus for MCP communication"""
     def __init__(self):
+        self.messages = []
         self.subscribers = {}
+    def publish(self, message: MCPMessage):
+        """Publish message to the bus"""
+        self.messages.append(message)
+        logger.info(f"Message published: {message.sender} -> {message.receiver} [{message.type}]")
+        # Notify subscribers
+        if message.receiver in self.subscribers:
+            for callback in self.subscribers[message.receiver]:
+                callback(message)
+    def subscribe(self, agent_name: str, callback):
+        """Subscribe agent to receive messages"""
+        if agent_name not in self.subscribers:
+            self.subscribers[agent_name] = []
+        self.subscribers[agent_name].append(callback)
+# Global message bus
+message_bus = MessageBus()
+class IngestionAgent:
+    """Agent responsible for document parsing and preprocessing"""
+    def __init__(self, message_bus: MessageBus):
+        self.name = "IngestionAgent"
+        self.message_bus = message_bus
+        self.message_bus.subscribe(self.name, self.handle_message)
         self.text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000,
+            chunk_overlap=200
         )
+    def handle_message(self, message: MCPMessage):
+        """Handle incoming MCP messages"""
+        if message.type == "INGESTION_REQUEST":
+            self.process_documents(message)
     def parse_pdf(self, file_path: str) -> str:
+        """Parse PDF document"""
         try:
             with open(file_path, 'rb') as file:
                 pdf_reader = PyPDF2.PdfReader(file)
                 text = ""
                 for page in pdf_reader.pages:
+                    text += page.extract_text()
                 return text
         except Exception as e:
             logger.error(f"Error parsing PDF: {e}")
             return ""
     def parse_pptx(self, file_path: str) -> str:
+        """Parse PPTX document"""
         try:
             prs = Presentation(file_path)
             text = ""
+            for slide in prs.slides:
                 for shape in slide.shapes:
                     if hasattr(shape, "text"):
                         text += shape.text + "\n"
             return text
         except Exception as e:
             logger.error(f"Error parsing PPTX: {e}")
             return ""
     def parse_csv(self, file_path: str) -> str:
+        """Parse CSV document"""
         try:
             df = pd.read_csv(file_path)
             return df.to_string()
             logger.error(f"Error parsing CSV: {e}")
             return ""
+    def parse_docx(self, file_path: str) -> str:
+        """Parse DOCX document"""
+        try:
+            doc = DocxDocument(file_path)
+            text = ""
+            for paragraph in doc.paragraphs:
+                text += paragraph.text + "\n"
+            return text
+        except Exception as e:
+            logger.error(f"Error parsing DOCX: {e}")
+            return ""
+    def parse_txt(self, file_path: str) -> str:
+        """Parse TXT/Markdown document"""
         try:
             with open(file_path, 'r', encoding='utf-8') as file:
+                return file.read()
         except Exception as e:
+            logger.error(f"Error parsing TXT: {e}")
             return ""
+    def process_documents(self, message: MCPMessage):
+        """Process uploaded documents"""
+        files = message.payload.get("files", [])
+        processed_docs = []
         for file_path in files:
             file_ext = os.path.splitext(file_path)[1].lower()
+            # Parse document based on file type
             if file_ext == '.pdf':
+                text = self.parse_pdf(file_path)
             elif file_ext == '.pptx':
+                text = self.parse_pptx(file_path)
             elif file_ext == '.csv':
+                text = self.parse_csv(file_path)
+            elif file_ext == '.docx':
+                text = self.parse_docx(file_path)
             elif file_ext in ['.txt', '.md']:
+                text = self.parse_txt(file_path)
             else:
                 logger.warning(f"Unsupported file type: {file_ext}")
                 continue
+            if text:
+                # Split text into chunks
+                chunks = self.text_splitter.split_text(text)
+                docs = [Document(page_content=chunk, metadata={"source": file_path})
+                       for chunk in chunks]
+                processed_docs.extend(docs)
+        # Send processed documents to RetrievalAgent
+        response = MCPMessage(
+            sender=self.name,
+            receiver="RetrievalAgent",
+            msg_type="INGESTION_COMPLETE",
+            trace_id=message.trace_id,
+            payload={"documents": processed_docs}
         )
+        self.message_bus.publish(response)
+class RetrievalAgent:
+    """Agent responsible for embedding and semantic retrieval"""
+    def __init__(self, message_bus: MessageBus):
+        self.name = "RetrievalAgent"
+        self.message_bus = message_bus
+        self.message_bus.subscribe(self.name, self.handle_message)
         self.vector_store = None
+    def handle_message(self, message: MCPMessage):
+        """Handle incoming MCP messages"""
+        if message.type == "INGESTION_COMPLETE":
+            self.create_vector_store(message)
+        elif message.type == "RETRIEVAL_REQUEST":
+            self.retrieve_context(message)
+    def create_vector_store(self, message: MCPMessage):
+        """Create vector store from processed documents"""
+        documents = message.payload.get("documents", [])
+        if documents:
+            try:
+                self.vector_store = FAISS.from_documents(documents, embeddings)
+                logger.info(f"Vector store created with {len(documents)} documents")
+                # Notify completion
+                response = MCPMessage(
+                    sender=self.name,
+                    receiver="CoordinatorAgent",
+                    msg_type="VECTORSTORE_READY",
+                    trace_id=message.trace_id,
+                    payload={"status": "ready"}
+                )
+                self.message_bus.publish(response)
+            except Exception as e:
+                logger.error(f"Error creating vector store: {e}")
+    def retrieve_context(self, message: MCPMessage):
+        """Retrieve relevant context for a query"""
+        query = message.payload.get("query", "")
+        k = message.payload.get("k", 3)
+        if self.vector_store and query:
+            try:
+                docs = self.vector_store.similarity_search(query, k=k)
+                context = [{"content": doc.page_content, "source": doc.metadata.get("source", "")}
+                          for doc in docs]
+                response = MCPMessage(
+                    sender=self.name,
+                    receiver="LLMResponseAgent",
+                    msg_type="CONTEXT_RESPONSE",
+                    trace_id=message.trace_id,
+                    payload={
+                        "query": query,
+                        "retrieved_context": context,
+                        "top_chunks": [doc.page_content for doc in docs]
+                    }
+                )
+                self.message_bus.publish(response)
+            except Exception as e:
+                logger.error(f"Error retrieving context: {e}")
+class LLMResponseAgent:
+    """Agent responsible for generating LLM responses"""
+    def __init__(self, message_bus: MessageBus):
+        self.name = "LLMResponseAgent"
+        self.message_bus = message_bus
+        self.message_bus.subscribe(self.name, self.handle_message)
+    def handle_message(self, message: MCPMessage):
+        """Handle incoming MCP messages"""
+        if message.type == "CONTEXT_RESPONSE":
+            self.generate_response(message)
+    def generate_response(self, message: MCPMessage):
+        """Generate response using retrieved context"""
+        query = message.payload.get("query", "")
+        context = message.payload.get("retrieved_context", [])
+        # Build prompt with context
+        context_text = "\n\n".join([f"Source: {ctx['source']}\nContent: {ctx['content']}"
+                                   for ctx in context])
+        prompt = f"""Based on the following context, please answer the user's question accurately and comprehensively.
 Context:
 {context_text}
 Question: {query}
 Answer:"""
         try:
+            # Generate streaming response
+            response_stream = client.text_generation(
                 prompt,
                 max_new_tokens=512,
                 temperature=0.7,
+                stream=True
             )
+            # Send streaming response
+            response = MCPMessage(
+                sender=self.name,
+                receiver="CoordinatorAgent",
+                msg_type="LLM_RESPONSE_STREAM",
+                trace_id=message.trace_id,
+                payload={
+                    "query": query,
+                    "response_stream": response_stream,
+                    "context": context
+                }
+            )
+            self.message_bus.publish(response)
         except Exception as e:
             logger.error(f"Error generating response: {e}")
+class CoordinatorAgent:
+    """Coordinator agent that orchestrates the entire workflow"""
+    def __init__(self, message_bus: MessageBus):
+        self.name = "CoordinatorAgent"
+        self.message_bus = message_bus
+        self.message_bus.subscribe(self.name, self.handle_message)
+        self.current_response_stream = None
+        self.vector_store_ready = False
+    def handle_message(self, message: MCPMessage):
+        """Handle incoming MCP messages"""
+        if message.type == "VECTORSTORE_READY":
+            self.vector_store_ready = True
+        elif message.type == "LLM_RESPONSE_STREAM":
+            self.current_response_stream = message.payload.get("response_stream")
+    def process_files(self, files):
+        """Process uploaded files"""
+        if not files:
+            return "No files uploaded."
+        file_paths = [file.name for file in files]
+        # Send ingestion request
+        message = MCPMessage(
+            sender=self.name,
+            receiver="IngestionAgent",
+            msg_type="INGESTION_REQUEST",
+            payload={"files": file_paths}
+        )
+        self.message_bus.publish(message)
+        return f"Processing {len(files)} files: {', '.join([os.path.basename(fp) for fp in file_paths])}"
+    def handle_query(self, query: str, history: List):
+        """Handle user query and return streaming response"""
+        if not self.vector_store_ready:
+            yield "Please upload and process documents first."
+            return
+        # Send retrieval request
+        message = MCPMessage(
+            sender=self.name,
+            receiver="RetrievalAgent",
+            msg_type="RETRIEVAL_REQUEST",
+            payload={"query": query}
+        )
+        self.message_bus.publish(message)
+        # Wait for response and stream
+        import time
+        timeout = 10  # seconds
+        start_time = time.time()
+        while not self.current_response_stream and (time.time() - start_time) < timeout:
+            time.sleep(0.1)
+        if self.current_response_stream:
+            partial_response = ""
+            try:
+                for token in self.current_response_stream:
+                    if token:
+                        partial_response += token
+                        yield partial_response
+                        time.sleep(0.05)  # Simulate streaming delay
+            except Exception as e:
+                yield f"Error generating response: {e}"
+            finally:
+                self.current_response_stream = None
         else:
+            yield "Timeout: No response received from LLM agent."
+# Initialize agents
+ingestion_agent = IngestionAgent(message_bus)
+retrieval_agent = RetrievalAgent(message_bus)
+llm_response_agent = LLMResponseAgent(message_bus)
+coordinator_agent = CoordinatorAgent(message_bus)
+# Gradio Interface
 def create_interface():
+    """Create Gradio interface"""
+    with gr.Blocks(
+        theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"),
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+        }
+        .header-text {
+            text-align: center;
+            color: #667eea;
+            font-size: 2.5em;
+            font-weight: bold;
+            margin-bottom: 10px;
+        }
+        .subheader-text {
+            text-align: center;
+            color: #666;
+            font-size: 1.2em;
+            margin-bottom: 20px;
+        }
+        .upload-section {
+            border: 2px dashed #667eea;
+            border-radius: 10px;
+            padding: 20px;
+            margin: 10px 0;
+        }
+        .chat-container {
+            height: 500px;
+        }
+        """,
+        title="🤖 Agentic RAG Chatbot"
+    ) as iface:
+        # Header
         gr.HTML("""
+        <div class="header-text">🤖 Agentic RAG Chatbot</div>
+        <div class="subheader-text">Multi-Format Document QA with Model Context Protocol (MCP)</div>
         """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("## 📁 Document Upload")
                 file_upload = gr.File(
                     file_count="multiple",
+                    file_types=[".pdf", ".pptx", ".csv", ".docx", ".txt", ".md"],
+                    label="Upload Documents (PDF, PPTX, CSV, DOCX, TXT, MD)",
+                    elem_classes=["upload-section"]
                 )
                 upload_status = gr.Textbox(
+                    label="Upload Status",
                     interactive=False,
+                    max_lines=3
                 )
+                process_btn = gr.Button(
+                    "🔄 Process Documents",
+                    variant="primary",
+                    size="lg"
+                )
+                gr.Markdown("## 🏗️ Architecture Info")
+                gr.Markdown("""
+                **Agents:**
+                - 🔄 IngestionAgent: Document parsing
+                - 🔍 RetrievalAgent: Semantic search
+                - 🤖 LLMResponseAgent: Response generation
+                - 🎯 CoordinatorAgent: Workflow orchestration
+                **MCP Communication:** Structured message passing between agents
                 """)
+            with gr.Column(scale=2):
+                gr.Markdown("## 💬 Chat Interface")
                 chatbot = gr.Chatbot(
+                    height=500,
+                    elem_classes=["chat-container"],
+                    show_copy_button=True,
+                    bubble_full_width=False
                 )
                 with gr.Row():
+                    msg = gr.Textbox(
+                        label="Ask a question about your documents...",
+                        placeholder="What are the key findings in the uploaded documents?",
+                        scale=4,
+                        submit=True
                     )
+                    submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
                 gr.Examples(
                     examples=[
+                        "What are the main topics discussed in the documents?",
                         "Can you summarize the key findings?",
+                        "What metrics or KPIs are mentioned?",
                         "What recommendations are provided?",
+                        "Are there any trends or patterns identified?"
                     ],
+                    inputs=msg
                 )
         # Event handlers
+        def process_files_handler(files):
+            return coordinator_agent.process_files(files)
+        def respond(message, history):
+            if message.strip():
+                # Add user message to history
+                history.append([message, ""])
+                # Get streaming response
+                for response in coordinator_agent.handle_query(message, history):
+                    history[-1][1] = response
+                    yield history, ""
+            else:
+                yield history, message
+        process_btn.click(
+            process_files_handler,
             inputs=[file_upload],
             outputs=[upload_status]
         )
+        submit_btn.click(
+            respond,
+            inputs=[msg, chatbot],
+            outputs=[chatbot, msg],
+            show_progress=True
         )
+        msg.submit(
+            respond,
+            inputs=[msg, chatbot],
+            outputs=[chatbot, msg],
+            show_progress=True
         )
+    return iface
+# Launch the application
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(
         share=True,
         server_name="0.0.0.0",
+        server_port=7860
     )