Spaces:

ragunath-ravi
/

DocAgent

Sleeping

App Files Files Community

ragunath-ravi commited on Jul 21

Commit

df9085f

verified ·

1 Parent(s): 620f836

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -34

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-
 class MCPMessage:
     """Model Context Protocol Message Structure"""
-    def __init__(self, sender: str, receiver: str, msg_type: str,
                  trace_id: str = None, payload: Dict = None):
         self.sender = sender
         self.receiver = receiver
@@ -183,7 +183,7 @@ class IngestionAgent:
                 # Split text into chunks
                 chunks = self.text_splitter.split_text(text)
                 docs = [Document(page_content=chunk, metadata={"source": file_path})
-                       for chunk in chunks]
                 processed_docs.extend(docs)
         # Send processed documents to RetrievalAgent
@@ -242,7 +242,7 @@ class RetrievalAgent:
             try:
                 docs = self.vector_store.similarity_search(query, k=k)
                 context = [{"content": doc.page_content, "source": doc.metadata.get("source", "")}
-                          for doc in docs]
                 response = MCPMessage(
                     sender=self.name,
@@ -277,24 +277,27 @@ class LLMResponseAgent:
         query = message.payload.get("query", "")
         context = message.payload.get("retrieved_context", [])
-        # Build prompt with context
         context_text = "\n\n".join([f"Source: {ctx['source']}\nContent: {ctx['content']}"
-                                   for ctx in context])
-        prompt = f"""Based on the following context, please answer the user's question accurately and comprehensively.
-Context:
-{context_text}
-Question: {query}
-Answer:"""
         try:
-            # Generate streaming response
-            response_stream = client.text_generation(
-                prompt,
-                max_new_tokens=512,
                 temperature=0.7,
                 stream=True
             )
@@ -315,6 +318,19 @@ Answer:"""
         except Exception as e:
             logger.error(f"Error generating response: {e}")
 class CoordinatorAgent:
     """Coordinator agent that orchestrates the entire workflow"""
@@ -351,7 +367,7 @@ class CoordinatorAgent:
         return f"Processing {len(files)} files: {', '.join([os.path.basename(fp) for fp in file_paths])}"
-    def handle_query(self, query: str, history: List):
         """Handle user query and return streaming response"""
         if not self.vector_store_ready:
             yield "Please upload and process documents first."
@@ -368,24 +384,24 @@ class CoordinatorAgent:
         # Wait for response and stream
         import time
-        timeout = 10  # seconds
         start_time = time.time()
         while not self.current_response_stream and (time.time() - start_time) < timeout:
             time.sleep(0.1)
         if self.current_response_stream:
-            partial_response = ""
             try:
-                for token in self.current_response_stream:
                     if token:
-                        partial_response += token
-                        yield partial_response
-                        time.sleep(0.05)  # Simulate streaming delay
             except Exception as e:
-                yield f"Error generating response: {e}"
             finally:
-                self.current_response_stream = None
         else:
             yield "Timeout: No response received from LLM agent."
@@ -478,14 +494,15 @@ def create_interface():
                     height=500,
                     elem_classes=["chat-container"],
                     show_copy_button=True,
-                    type="messages"
                 )
                 with gr.Row():
                     msg = gr.Textbox(
                         label="Ask a question about your documents...",
                         placeholder="What are the key findings in the uploaded documents?",
-                        scale=4
                     )
                     submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
@@ -506,13 +523,16 @@ def create_interface():
         def respond(message, history):
             if message.strip():
-                # Add user message to history
-                history.append([message, ""])
                 # Get streaming response
-                for response in coordinator_agent.handle_query(message, history):
-                    history[-1][1] = response
-                    yield history, ""
             else:
                 yield history, message

 class MCPMessage:
     """Model Context Protocol Message Structure"""
+    def __init__(self, sender: str, receiver: str, msg_type: str,
                  trace_id: str = None, payload: Dict = None):
         self.sender = sender
         self.receiver = receiver
                 # Split text into chunks
                 chunks = self.text_splitter.split_text(text)
                 docs = [Document(page_content=chunk, metadata={"source": file_path})
+                        for chunk in chunks]
                 processed_docs.extend(docs)
         # Send processed documents to RetrievalAgent
             try:
                 docs = self.vector_store.similarity_search(query, k=k)
                 context = [{"content": doc.page_content, "source": doc.metadata.get("source", "")}
+                           for doc in docs]
                 response = MCPMessage(
                     sender=self.name,
         query = message.payload.get("query", "")
         context = message.payload.get("retrieved_context", [])
+        # Build context string
         context_text = "\n\n".join([f"Source: {ctx['source']}\nContent: {ctx['content']}"
+                                      for ctx in context])
+        # Create messages for conversational format
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant. Based on the provided context below, answer the user's question accurately and comprehensively. Cite the sources if possible.",
+            },
+            {
+                "role": "user",
+                "content": f"Context:\n\n{context_text}\n\nQuestion: {query}"
+            }
+        ]
         try:
+            # Use client.chat_completion for conversational models
+            response_stream = client.chat_completion(
+                messages=messages,
+                max_tokens=512,
                 temperature=0.7,
                 stream=True
             )
         except Exception as e:
             logger.error(f"Error generating response: {e}")
+            # Send an error stream back
+            error_msg = f"Error from LLM: {e}"
+            def error_generator():
+                yield error_msg
+            response = MCPMessage(
+                sender=self.name,
+                receiver="CoordinatorAgent",
+                msg_type="LLM_RESPONSE_STREAM",
+                trace_id=message.trace_id,
+                payload={"response_stream": error_generator()}
+            )
+            self.message_bus.publish(response)
 class CoordinatorAgent:
     """Coordinator agent that orchestrates the entire workflow"""
         return f"Processing {len(files)} files: {', '.join([os.path.basename(fp) for fp in file_paths])}"
+    def handle_query(self, query: str, history: List) -> Generator[str, None, None]:
         """Handle user query and return streaming response"""
         if not self.vector_store_ready:
             yield "Please upload and process documents first."
         # Wait for response and stream
         import time
+        timeout = 20  # seconds
         start_time = time.time()
         while not self.current_response_stream and (time.time() - start_time) < timeout:
             time.sleep(0.1)
         if self.current_response_stream:
             try:
+                # Stream tokens directly
+                for chunk in self.current_response_stream:
+                    # The token is in chunk.choices[0].delta.content for chat_completion
+                    token = chunk.choices[0].delta.content
                     if token:
+                        yield token
             except Exception as e:
+                yield f"Error streaming response: {e}"
             finally:
+                self.current_response_stream = None # Reset for next query
         else:
             yield "Timeout: No response received from LLM agent."
                     height=500,
                     elem_classes=["chat-container"],
                     show_copy_button=True,
+                    type="messages" # This requires the new data format
                 )
                 with gr.Row():
                     msg = gr.Textbox(
                         label="Ask a question about your documents...",
                         placeholder="What are the key findings in the uploaded documents?",
+                        scale=4,
+                        autofocus=True
                     )
                     submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
         def respond(message, history):
             if message.strip():
+                # Add user message to history in the new format
+                history.append({"role": "user", "content": message})
+                # Add a placeholder for the assistant's response
+                history.append({"role": "assistant", "content": ""})
                 # Get streaming response
+                for token in coordinator_agent.handle_query(message, history):
+                    # Append each token to the assistant's message content
+                    history[-1]["content"] += token
+                    yield history, "" # Yield updated history and clear the textbox
             else:
                 yield history, message