Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -38,7 +38,7 @@ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-
|
|
38 |
|
39 |
class MCPMessage:
|
40 |
"""Model Context Protocol Message Structure"""
|
41 |
-
def __init__(self, sender: str, receiver: str, msg_type: str,
|
42 |
trace_id: str = None, payload: Dict = None):
|
43 |
self.sender = sender
|
44 |
self.receiver = receiver
|
@@ -183,7 +183,7 @@ class IngestionAgent:
|
|
183 |
# Split text into chunks
|
184 |
chunks = self.text_splitter.split_text(text)
|
185 |
docs = [Document(page_content=chunk, metadata={"source": file_path})
|
186 |
-
|
187 |
processed_docs.extend(docs)
|
188 |
|
189 |
# Send processed documents to RetrievalAgent
|
@@ -242,7 +242,7 @@ class RetrievalAgent:
|
|
242 |
try:
|
243 |
docs = self.vector_store.similarity_search(query, k=k)
|
244 |
context = [{"content": doc.page_content, "source": doc.metadata.get("source", "")}
|
245 |
-
|
246 |
|
247 |
response = MCPMessage(
|
248 |
sender=self.name,
|
@@ -277,24 +277,27 @@ class LLMResponseAgent:
|
|
277 |
query = message.payload.get("query", "")
|
278 |
context = message.payload.get("retrieved_context", [])
|
279 |
|
280 |
-
# Build
|
281 |
context_text = "\n\n".join([f"Source: {ctx['source']}\nContent: {ctx['content']}"
|
282 |
-
|
283 |
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
|
|
|
|
|
|
292 |
|
293 |
try:
|
294 |
-
#
|
295 |
-
response_stream = client.
|
296 |
-
|
297 |
-
|
298 |
temperature=0.7,
|
299 |
stream=True
|
300 |
)
|
@@ -315,6 +318,19 @@ Answer:"""
|
|
315 |
|
316 |
except Exception as e:
|
317 |
logger.error(f"Error generating response: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
class CoordinatorAgent:
|
320 |
"""Coordinator agent that orchestrates the entire workflow"""
|
@@ -351,7 +367,7 @@ class CoordinatorAgent:
|
|
351 |
|
352 |
return f"Processing {len(files)} files: {', '.join([os.path.basename(fp) for fp in file_paths])}"
|
353 |
|
354 |
-
def handle_query(self, query: str, history: List):
|
355 |
"""Handle user query and return streaming response"""
|
356 |
if not self.vector_store_ready:
|
357 |
yield "Please upload and process documents first."
|
@@ -368,24 +384,24 @@ class CoordinatorAgent:
|
|
368 |
|
369 |
# Wait for response and stream
|
370 |
import time
|
371 |
-
timeout =
|
372 |
start_time = time.time()
|
373 |
|
374 |
while not self.current_response_stream and (time.time() - start_time) < timeout:
|
375 |
time.sleep(0.1)
|
376 |
|
377 |
if self.current_response_stream:
|
378 |
-
partial_response = ""
|
379 |
try:
|
380 |
-
|
|
|
|
|
|
|
381 |
if token:
|
382 |
-
|
383 |
-
yield partial_response
|
384 |
-
time.sleep(0.05) # Simulate streaming delay
|
385 |
except Exception as e:
|
386 |
-
yield f"Error
|
387 |
finally:
|
388 |
-
self.current_response_stream = None
|
389 |
else:
|
390 |
yield "Timeout: No response received from LLM agent."
|
391 |
|
@@ -478,14 +494,15 @@ def create_interface():
|
|
478 |
height=500,
|
479 |
elem_classes=["chat-container"],
|
480 |
show_copy_button=True,
|
481 |
-
type="messages"
|
482 |
)
|
483 |
|
484 |
with gr.Row():
|
485 |
msg = gr.Textbox(
|
486 |
label="Ask a question about your documents...",
|
487 |
placeholder="What are the key findings in the uploaded documents?",
|
488 |
-
scale=4
|
|
|
489 |
)
|
490 |
submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
|
491 |
|
@@ -506,13 +523,16 @@ def create_interface():
|
|
506 |
|
507 |
def respond(message, history):
|
508 |
if message.strip():
|
509 |
-
# Add user message to history
|
510 |
-
history.append(
|
511 |
-
|
|
|
|
|
512 |
# Get streaming response
|
513 |
-
for
|
514 |
-
|
515 |
-
|
|
|
516 |
else:
|
517 |
yield history, message
|
518 |
|
|
|
38 |
|
39 |
class MCPMessage:
|
40 |
"""Model Context Protocol Message Structure"""
|
41 |
+
def __init__(self, sender: str, receiver: str, msg_type: str,
|
42 |
trace_id: str = None, payload: Dict = None):
|
43 |
self.sender = sender
|
44 |
self.receiver = receiver
|
|
|
183 |
# Split text into chunks
|
184 |
chunks = self.text_splitter.split_text(text)
|
185 |
docs = [Document(page_content=chunk, metadata={"source": file_path})
|
186 |
+
for chunk in chunks]
|
187 |
processed_docs.extend(docs)
|
188 |
|
189 |
# Send processed documents to RetrievalAgent
|
|
|
242 |
try:
|
243 |
docs = self.vector_store.similarity_search(query, k=k)
|
244 |
context = [{"content": doc.page_content, "source": doc.metadata.get("source", "")}
|
245 |
+
for doc in docs]
|
246 |
|
247 |
response = MCPMessage(
|
248 |
sender=self.name,
|
|
|
277 |
query = message.payload.get("query", "")
|
278 |
context = message.payload.get("retrieved_context", [])
|
279 |
|
280 |
+
# Build context string
|
281 |
context_text = "\n\n".join([f"Source: {ctx['source']}\nContent: {ctx['content']}"
|
282 |
+
for ctx in context])
|
283 |
|
284 |
+
# Create messages for conversational format
|
285 |
+
messages = [
|
286 |
+
{
|
287 |
+
"role": "system",
|
288 |
+
"content": "You are a helpful assistant. Based on the provided context below, answer the user's question accurately and comprehensively. Cite the sources if possible.",
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"role": "user",
|
292 |
+
"content": f"Context:\n\n{context_text}\n\nQuestion: {query}"
|
293 |
+
}
|
294 |
+
]
|
295 |
|
296 |
try:
|
297 |
+
# Use client.chat_completion for conversational models
|
298 |
+
response_stream = client.chat_completion(
|
299 |
+
messages=messages,
|
300 |
+
max_tokens=512,
|
301 |
temperature=0.7,
|
302 |
stream=True
|
303 |
)
|
|
|
318 |
|
319 |
except Exception as e:
|
320 |
logger.error(f"Error generating response: {e}")
|
321 |
+
# Send an error stream back
|
322 |
+
error_msg = f"Error from LLM: {e}"
|
323 |
+
def error_generator():
|
324 |
+
yield error_msg
|
325 |
+
|
326 |
+
response = MCPMessage(
|
327 |
+
sender=self.name,
|
328 |
+
receiver="CoordinatorAgent",
|
329 |
+
msg_type="LLM_RESPONSE_STREAM",
|
330 |
+
trace_id=message.trace_id,
|
331 |
+
payload={"response_stream": error_generator()}
|
332 |
+
)
|
333 |
+
self.message_bus.publish(response)
|
334 |
|
335 |
class CoordinatorAgent:
|
336 |
"""Coordinator agent that orchestrates the entire workflow"""
|
|
|
367 |
|
368 |
return f"Processing {len(files)} files: {', '.join([os.path.basename(fp) for fp in file_paths])}"
|
369 |
|
370 |
+
def handle_query(self, query: str, history: List) -> Generator[str, None, None]:
|
371 |
"""Handle user query and return streaming response"""
|
372 |
if not self.vector_store_ready:
|
373 |
yield "Please upload and process documents first."
|
|
|
384 |
|
385 |
# Wait for response and stream
|
386 |
import time
|
387 |
+
timeout = 20 # seconds
|
388 |
start_time = time.time()
|
389 |
|
390 |
while not self.current_response_stream and (time.time() - start_time) < timeout:
|
391 |
time.sleep(0.1)
|
392 |
|
393 |
if self.current_response_stream:
|
|
|
394 |
try:
|
395 |
+
# Stream tokens directly
|
396 |
+
for chunk in self.current_response_stream:
|
397 |
+
# The token is in chunk.choices[0].delta.content for chat_completion
|
398 |
+
token = chunk.choices[0].delta.content
|
399 |
if token:
|
400 |
+
yield token
|
|
|
|
|
401 |
except Exception as e:
|
402 |
+
yield f"Error streaming response: {e}"
|
403 |
finally:
|
404 |
+
self.current_response_stream = None # Reset for next query
|
405 |
else:
|
406 |
yield "Timeout: No response received from LLM agent."
|
407 |
|
|
|
494 |
height=500,
|
495 |
elem_classes=["chat-container"],
|
496 |
show_copy_button=True,
|
497 |
+
type="messages" # This requires the new data format
|
498 |
)
|
499 |
|
500 |
with gr.Row():
|
501 |
msg = gr.Textbox(
|
502 |
label="Ask a question about your documents...",
|
503 |
placeholder="What are the key findings in the uploaded documents?",
|
504 |
+
scale=4,
|
505 |
+
autofocus=True
|
506 |
)
|
507 |
submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
|
508 |
|
|
|
523 |
|
524 |
def respond(message, history):
|
525 |
if message.strip():
|
526 |
+
# Add user message to history in the new format
|
527 |
+
history.append({"role": "user", "content": message})
|
528 |
+
# Add a placeholder for the assistant's response
|
529 |
+
history.append({"role": "assistant", "content": ""})
|
530 |
+
|
531 |
# Get streaming response
|
532 |
+
for token in coordinator_agent.handle_query(message, history):
|
533 |
+
# Append each token to the assistant's message content
|
534 |
+
history[-1]["content"] += token
|
535 |
+
yield history, "" # Yield updated history and clear the textbox
|
536 |
else:
|
537 |
yield history, message
|
538 |
|