Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -78,7 +78,7 @@ class BrowserAgent:
|
|
78 |
max_context_chars=2000 # Limit context size
|
79 |
)
|
80 |
|
81 |
-
|
82 |
"""Generate a detailed prompt section about available tools"""
|
83 |
try:
|
84 |
tools_prompt = "\n## π οΈ AVAILABLE TOOLS\n"
|
@@ -124,7 +124,7 @@ Verify results before moving on
|
|
124 |
Keep Browser Open
|
125 |
Never close the session unless explicitly instructed
|
126 |
Avoid Redundancy
|
127 |
-
Don
|
128 |
## π¨ SESSION PERSISTENCE RULES
|
129 |
- Browser stays open for the entire conversation
|
130 |
- Each action builds on previous state
|
@@ -135,7 +135,7 @@ Donβt repeat actions (e.g., clicking) when data is already collected
|
|
135 |
logger.error(f"Failed to generate tools prompt: {e}")
|
136 |
return "\n## π οΈ TOOLS\nBrowser automation tools available but not detailed.\n"
|
137 |
|
138 |
-
|
139 |
base = """π Browser Agent β Persistent Session & Optimized Memory
|
140 |
You are an intelligent browser automation agent (Playwright via MCP) tasked with keeping a lightweight, ongoing session:
|
141 |
π― Mission
|
@@ -152,14 +152,14 @@ Snapshot β confirmation
|
|
152 |
Next plan (if needed)
|
153 |
π‘ Best Practices
|
154 |
Use text selectors and wait for content
|
155 |
-
Pause 2
|
156 |
Be concise and focused on the current task it s important as soon as you have the information you came for return it
|
157 |
If earlier context is needed, ask the user to clarify.
|
158 |
"""
|
159 |
-
tools_section =
|
160 |
return base + tools_section
|
161 |
|
162 |
-
|
163 |
"""Initialize MCP client, model, session and agent"""
|
164 |
try:
|
165 |
logger.info("π Initializing Browser Agent...")
|
@@ -172,11 +172,14 @@ If earlier context is needed, ask the user to clarify.
|
|
172 |
self.model = ChatMistralAI(
|
173 |
model="devstral-small-latest",
|
174 |
api_key=mistral_key,
|
175 |
-
|
176 |
)
|
177 |
logger.info("β
Mistral LLM initialized with optimized settings")
|
178 |
|
179 |
-
# MCP
|
|
|
|
|
|
|
|
|
180 |
self.client = MultiServerMCPClient({
|
181 |
"browser": {
|
182 |
"command": "npx",
|
@@ -186,13 +189,13 @@ If earlier context is needed, ask the user to clarify.
|
|
186 |
})
|
187 |
logger.info("β
MCP client created")
|
188 |
|
189 |
-
# Start persistent session
|
190 |
self.session_context = self.client.session("browser")
|
191 |
-
self.session =
|
192 |
logger.info("β
MCP session opened")
|
193 |
|
194 |
-
# Load tools
|
195 |
-
tools =
|
196 |
tools.append(SleepTool(description="Wait 2 seconds between two calls"))
|
197 |
logger.info(f"π₯ Loaded {len(tools)} tools")
|
198 |
self.available_tools = {t.name: t for t in tools}
|
@@ -201,13 +204,13 @@ If earlier context is needed, ask the user to clarify.
|
|
201 |
install_tool = self.available_tools.get("browser_install")
|
202 |
if install_tool:
|
203 |
try:
|
204 |
-
result =
|
205 |
logger.info(f"π₯ Browser install: {result}")
|
206 |
except Exception as e:
|
207 |
logger.warning(f"β οΈ Browser install failed: {e}, continuing.")
|
208 |
|
209 |
# System prompt
|
210 |
-
self.system_prompt =
|
211 |
|
212 |
# Create agent
|
213 |
prompt = ChatPromptTemplate.from_messages([
|
@@ -238,10 +241,10 @@ If earlier context is needed, ask the user to clarify.
|
|
238 |
|
239 |
except Exception as e:
|
240 |
logger.error(f"β Initialization failed: {e}")
|
241 |
-
|
242 |
raise
|
243 |
|
244 |
-
|
245 |
if not self.initialized:
|
246 |
return "β Agent not initialized. Please restart the application."
|
247 |
|
@@ -264,11 +267,12 @@ If earlier context is needed, ask the user to clarify.
|
|
264 |
optimized_pairs = len(optimized_history)
|
265 |
logger.info(f"π° Token optimization: {original_pairs} β {optimized_pairs} history pairs")
|
266 |
|
267 |
-
# Execute with optimized history
|
268 |
-
|
|
|
269 |
"input": enhanced_query,
|
270 |
"chat_history": history_messages
|
271 |
-
})
|
272 |
|
273 |
# Update session context with this interaction
|
274 |
self.conversation_manager.update_session_context(
|
@@ -282,17 +286,19 @@ If earlier context is needed, ask the user to clarify.
|
|
282 |
logger.error(f"Error processing query: {e}")
|
283 |
return f"β Error: {e}\nπ‘ Ask for a screenshot to diagnose."
|
284 |
|
285 |
-
|
286 |
"""Cleanup resources properly"""
|
287 |
try:
|
288 |
if self.session_context:
|
289 |
-
|
|
|
290 |
logger.info("β
MCP session closed")
|
291 |
self.session_context = None
|
292 |
self.session = None
|
293 |
|
294 |
if self.client:
|
295 |
-
|
|
|
296 |
logger.info("β
MCP client closed")
|
297 |
self.client = None
|
298 |
|
@@ -325,10 +331,9 @@ If earlier context is needed, ask the user to clarify.
|
|
325 |
|
326 |
# Global agent instance
|
327 |
agent: Optional[BrowserAgent] = None
|
328 |
-
event_loop: Optional[asyncio.AbstractEventLoop] = None
|
329 |
|
330 |
-
|
331 |
-
"""Initialize the agent
|
332 |
global agent
|
333 |
|
334 |
if not api_key.strip():
|
@@ -337,32 +342,32 @@ async def initialize_agent_async(api_key: str) -> str:
|
|
337 |
try:
|
338 |
# Cleanup existing agent
|
339 |
if agent:
|
340 |
-
|
341 |
|
342 |
# Create new agent
|
343 |
agent = BrowserAgent(api_key)
|
344 |
-
|
345 |
|
346 |
-
info =
|
347 |
return f"β
Agent Initialized Successfully with Token Optimization!\n\n{info[:1000]}..."
|
348 |
|
349 |
except Exception as e:
|
350 |
logger.error(f"Initialization error: {e}")
|
351 |
return f"β Failed to initialize agent: {e}"
|
352 |
|
353 |
-
|
354 |
-
"""Process message
|
355 |
global agent
|
356 |
|
357 |
if not agent or not agent.initialized:
|
358 |
error_msg = "β Agent not initialized. Please initialize first with your API key."
|
359 |
history.append([message, error_msg])
|
360 |
-
return
|
361 |
|
362 |
if not message.strip():
|
363 |
error_msg = "Please enter a message"
|
364 |
history.append([message, error_msg])
|
365 |
-
return
|
366 |
|
367 |
try:
|
368 |
# Convert history format for the agent
|
@@ -372,7 +377,7 @@ async def process_message_async(message: str, history: List[List[str]]) -> Tuple
|
|
372 |
stats = agent.get_token_usage_stats(agent_history)
|
373 |
|
374 |
# Process the query with optimized history
|
375 |
-
response =
|
376 |
|
377 |
# Add token savings info to response if significant savings
|
378 |
if stats["savings_percentage"] > 50:
|
@@ -381,32 +386,15 @@ async def process_message_async(message: str, history: List[List[str]]) -> Tuple
|
|
381 |
# Add to history
|
382 |
history.append([message, response])
|
383 |
|
384 |
-
return
|
385 |
|
386 |
except Exception as e:
|
387 |
logger.error(f"Message processing error: {e}")
|
388 |
error_msg = f"β Error: {e}\nπ‘ Try asking for a screenshot to diagnose."
|
389 |
history.append([message, error_msg])
|
390 |
-
return
|
391 |
-
|
392 |
-
def run_in_event_loop(coro):
|
393 |
-
"""Run coroutine in the event loop"""
|
394 |
-
global event_loop
|
395 |
-
if event_loop and not event_loop.is_closed():
|
396 |
-
return asyncio.run_coroutine_threadsafe(coro, event_loop).result()
|
397 |
-
else:
|
398 |
-
return asyncio.run(coro)
|
399 |
-
|
400 |
-
# Sync wrappers for Gradio
|
401 |
-
def initialize_agent_sync(api_key: str) -> str:
|
402 |
-
"""Sync wrapper for agent initialization"""
|
403 |
-
return run_in_event_loop(initialize_agent_async(api_key))
|
404 |
-
|
405 |
-
def process_message_sync(message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
|
406 |
-
"""Sync wrapper for message processing"""
|
407 |
-
return run_in_event_loop(process_message_async(message, history))
|
408 |
|
409 |
-
def
|
410 |
"""Get token usage statistics"""
|
411 |
global agent
|
412 |
if not agent or not agent.initialized:
|
@@ -424,6 +412,10 @@ def get_token_stats_sync(history: List[List[str]]) -> str:
|
|
424 |
β’ Estimated tokens saved: {stats['estimated_tokens_saved']:,}
|
425 |
β’ Savings percentage: {stats['savings_percentage']:.1f}%"""
|
426 |
|
|
|
|
|
|
|
|
|
427 |
def create_interface():
|
428 |
"""Create the Gradio interface"""
|
429 |
|
@@ -509,21 +501,27 @@ def create_interface():
|
|
509 |
|
510 |
# Event handlers
|
511 |
init_button.click(
|
512 |
-
fn=
|
513 |
inputs=[api_key_input],
|
514 |
outputs=[status_output]
|
515 |
)
|
516 |
|
517 |
send_button.click(
|
518 |
-
fn=
|
519 |
inputs=[message_input, chatbot],
|
520 |
-
outputs=[
|
|
|
|
|
|
|
521 |
)
|
522 |
|
523 |
message_input.submit(
|
524 |
-
fn=
|
525 |
inputs=[message_input, chatbot],
|
526 |
-
outputs=[
|
|
|
|
|
|
|
527 |
)
|
528 |
|
529 |
clear_button.click(
|
@@ -532,13 +530,13 @@ def create_interface():
|
|
532 |
)
|
533 |
|
534 |
screenshot_button.click(
|
535 |
-
fn=
|
536 |
inputs=[chatbot],
|
537 |
-
outputs=[
|
538 |
)
|
539 |
|
540 |
token_stats_button.click(
|
541 |
-
fn=
|
542 |
inputs=[chatbot],
|
543 |
outputs=[token_stats_output]
|
544 |
)
|
@@ -584,32 +582,26 @@ def create_interface():
|
|
584 |
|
585 |
return interface
|
586 |
|
587 |
-
|
588 |
"""Cleanup agent resources"""
|
589 |
global agent
|
590 |
if agent:
|
591 |
-
|
592 |
logger.info("π§Ή Agent cleaned up")
|
593 |
|
594 |
def signal_handler(signum, frame):
|
595 |
"""Handle shutdown signals"""
|
596 |
logger.info(f"π‘ Received signal {signum}, cleaning up...")
|
597 |
-
|
598 |
-
if event_loop and not event_loop.is_closed():
|
599 |
-
event_loop.create_task(cleanup_agent())
|
600 |
sys.exit(0)
|
601 |
|
602 |
-
|
603 |
-
"""Main
|
604 |
-
global event_loop
|
605 |
|
606 |
# Set up signal handlers
|
607 |
signal.signal(signal.SIGINT, signal_handler)
|
608 |
signal.signal(signal.SIGTERM, signal_handler)
|
609 |
|
610 |
-
# Get the current event loop
|
611 |
-
event_loop = asyncio.get_event_loop()
|
612 |
-
|
613 |
try:
|
614 |
logger.info("π Starting MCP Browser Agent Application with Token Optimization...")
|
615 |
|
@@ -627,11 +619,11 @@ async def main():
|
|
627 |
except Exception as e:
|
628 |
logger.error(f"Application error: {e}")
|
629 |
finally:
|
630 |
-
|
631 |
|
632 |
if __name__ == "__main__":
|
633 |
try:
|
634 |
-
|
635 |
except KeyboardInterrupt:
|
636 |
logger.info("π Application stopped by user")
|
637 |
except Exception as e:
|
|
|
78 |
max_context_chars=2000 # Limit context size
|
79 |
)
|
80 |
|
81 |
+
def generate_tools_prompt(self):
|
82 |
"""Generate a detailed prompt section about available tools"""
|
83 |
try:
|
84 |
tools_prompt = "\n## π οΈ AVAILABLE TOOLS\n"
|
|
|
124 |
Keep Browser Open
|
125 |
Never close the session unless explicitly instructed
|
126 |
Avoid Redundancy
|
127 |
+
Don't repeat actions (e.g., clicking) when data is already collected
|
128 |
## π¨ SESSION PERSISTENCE RULES
|
129 |
- Browser stays open for the entire conversation
|
130 |
- Each action builds on previous state
|
|
|
135 |
logger.error(f"Failed to generate tools prompt: {e}")
|
136 |
return "\n## π οΈ TOOLS\nBrowser automation tools available but not detailed.\n"
|
137 |
|
138 |
+
def get_system_prompt_with_tools(self):
|
139 |
base = """π Browser Agent β Persistent Session & Optimized Memory
|
140 |
You are an intelligent browser automation agent (Playwright via MCP) tasked with keeping a lightweight, ongoing session:
|
141 |
π― Mission
|
|
|
152 |
Next plan (if needed)
|
153 |
π‘ Best Practices
|
154 |
Use text selectors and wait for content
|
155 |
+
Pause 2 s between tool calls
|
156 |
Be concise and focused on the current task it s important as soon as you have the information you came for return it
|
157 |
If earlier context is needed, ask the user to clarify.
|
158 |
"""
|
159 |
+
tools_section = self.generate_tools_prompt()
|
160 |
return base + tools_section
|
161 |
|
162 |
+
def initialize(self):
|
163 |
"""Initialize MCP client, model, session and agent"""
|
164 |
try:
|
165 |
logger.info("π Initializing Browser Agent...")
|
|
|
172 |
self.model = ChatMistralAI(
|
173 |
model="devstral-small-latest",
|
174 |
api_key=mistral_key,
|
|
|
175 |
)
|
176 |
logger.info("β
Mistral LLM initialized with optimized settings")
|
177 |
|
178 |
+
# Create event loop for MCP operations
|
179 |
+
loop = asyncio.new_event_loop()
|
180 |
+
asyncio.set_event_loop(loop)
|
181 |
+
|
182 |
+
# MCP client setup (async operations in sync wrapper)
|
183 |
self.client = MultiServerMCPClient({
|
184 |
"browser": {
|
185 |
"command": "npx",
|
|
|
189 |
})
|
190 |
logger.info("β
MCP client created")
|
191 |
|
192 |
+
# Start persistent session (run async operation in sync context)
|
193 |
self.session_context = self.client.session("browser")
|
194 |
+
self.session = loop.run_until_complete(self.session_context.__aenter__())
|
195 |
logger.info("β
MCP session opened")
|
196 |
|
197 |
+
# Load tools (async operation)
|
198 |
+
tools = loop.run_until_complete(load_mcp_tools(self.session))
|
199 |
tools.append(SleepTool(description="Wait 2 seconds between two calls"))
|
200 |
logger.info(f"π₯ Loaded {len(tools)} tools")
|
201 |
self.available_tools = {t.name: t for t in tools}
|
|
|
204 |
install_tool = self.available_tools.get("browser_install")
|
205 |
if install_tool:
|
206 |
try:
|
207 |
+
result = loop.run_until_complete(install_tool.arun({}))
|
208 |
logger.info(f"π₯ Browser install: {result}")
|
209 |
except Exception as e:
|
210 |
logger.warning(f"β οΈ Browser install failed: {e}, continuing.")
|
211 |
|
212 |
# System prompt
|
213 |
+
self.system_prompt = self.get_system_prompt_with_tools()
|
214 |
|
215 |
# Create agent
|
216 |
prompt = ChatPromptTemplate.from_messages([
|
|
|
241 |
|
242 |
except Exception as e:
|
243 |
logger.error(f"β Initialization failed: {e}")
|
244 |
+
self.cleanup()
|
245 |
raise
|
246 |
|
247 |
+
def process_query(self, query: str, chat_history: List[Tuple[str, str]]) -> str:
|
248 |
if not self.initialized:
|
249 |
return "β Agent not initialized. Please restart the application."
|
250 |
|
|
|
267 |
optimized_pairs = len(optimized_history)
|
268 |
logger.info(f"π° Token optimization: {original_pairs} β {optimized_pairs} history pairs")
|
269 |
|
270 |
+
# Execute with optimized history (run async operation in sync context)
|
271 |
+
loop = asyncio.get_event_loop()
|
272 |
+
resp = loop.run_until_complete(self.agent_executor.ainvoke({
|
273 |
"input": enhanced_query,
|
274 |
"chat_history": history_messages
|
275 |
+
}))
|
276 |
|
277 |
# Update session context with this interaction
|
278 |
self.conversation_manager.update_session_context(
|
|
|
286 |
logger.error(f"Error processing query: {e}")
|
287 |
return f"β Error: {e}\nπ‘ Ask for a screenshot to diagnose."
|
288 |
|
289 |
+
def cleanup(self):
|
290 |
"""Cleanup resources properly"""
|
291 |
try:
|
292 |
if self.session_context:
|
293 |
+
loop = asyncio.get_event_loop()
|
294 |
+
loop.run_until_complete(self.session_context.__aexit__(None, None, None))
|
295 |
logger.info("β
MCP session closed")
|
296 |
self.session_context = None
|
297 |
self.session = None
|
298 |
|
299 |
if self.client:
|
300 |
+
loop = asyncio.get_event_loop()
|
301 |
+
loop.run_until_complete(self.client.close())
|
302 |
logger.info("β
MCP client closed")
|
303 |
self.client = None
|
304 |
|
|
|
331 |
|
332 |
# Global agent instance
|
333 |
agent: Optional[BrowserAgent] = None
|
|
|
334 |
|
335 |
+
def initialize_agent(api_key: str) -> str:
|
336 |
+
"""Initialize the agent"""
|
337 |
global agent
|
338 |
|
339 |
if not api_key.strip():
|
|
|
342 |
try:
|
343 |
# Cleanup existing agent
|
344 |
if agent:
|
345 |
+
agent.cleanup()
|
346 |
|
347 |
# Create new agent
|
348 |
agent = BrowserAgent(api_key)
|
349 |
+
agent.initialize()
|
350 |
|
351 |
+
info = agent.get_system_prompt_with_tools()
|
352 |
return f"β
Agent Initialized Successfully with Token Optimization!\n\n{info[:1000]}..."
|
353 |
|
354 |
except Exception as e:
|
355 |
logger.error(f"Initialization error: {e}")
|
356 |
return f"β Failed to initialize agent: {e}"
|
357 |
|
358 |
+
def process_message(message: str, history: List[List[str]]) -> List[List[str]]:
|
359 |
+
"""Process message and return updated history"""
|
360 |
global agent
|
361 |
|
362 |
if not agent or not agent.initialized:
|
363 |
error_msg = "β Agent not initialized. Please initialize first with your API key."
|
364 |
history.append([message, error_msg])
|
365 |
+
return history
|
366 |
|
367 |
if not message.strip():
|
368 |
error_msg = "Please enter a message"
|
369 |
history.append([message, error_msg])
|
370 |
+
return history
|
371 |
|
372 |
try:
|
373 |
# Convert history format for the agent
|
|
|
377 |
stats = agent.get_token_usage_stats(agent_history)
|
378 |
|
379 |
# Process the query with optimized history
|
380 |
+
response = agent.process_query(message, agent_history)
|
381 |
|
382 |
# Add token savings info to response if significant savings
|
383 |
if stats["savings_percentage"] > 50:
|
|
|
386 |
# Add to history
|
387 |
history.append([message, response])
|
388 |
|
389 |
+
return history
|
390 |
|
391 |
except Exception as e:
|
392 |
logger.error(f"Message processing error: {e}")
|
393 |
error_msg = f"β Error: {e}\nπ‘ Try asking for a screenshot to diagnose."
|
394 |
history.append([message, error_msg])
|
395 |
+
return history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
|
397 |
+
def get_token_stats(history: List[List[str]]) -> str:
|
398 |
"""Get token usage statistics"""
|
399 |
global agent
|
400 |
if not agent or not agent.initialized:
|
|
|
412 |
β’ Estimated tokens saved: {stats['estimated_tokens_saved']:,}
|
413 |
β’ Savings percentage: {stats['savings_percentage']:.1f}%"""
|
414 |
|
415 |
+
def screenshot_quick(history: List[List[str]]) -> List[List[str]]:
|
416 |
+
"""Quick screenshot function"""
|
417 |
+
return process_message("Take a screenshot of the current page", history)
|
418 |
+
|
419 |
def create_interface():
|
420 |
"""Create the Gradio interface"""
|
421 |
|
|
|
501 |
|
502 |
# Event handlers
|
503 |
init_button.click(
|
504 |
+
fn=initialize_agent,
|
505 |
inputs=[api_key_input],
|
506 |
outputs=[status_output]
|
507 |
)
|
508 |
|
509 |
send_button.click(
|
510 |
+
fn=process_message,
|
511 |
inputs=[message_input, chatbot],
|
512 |
+
outputs=[chatbot]
|
513 |
+
).then(
|
514 |
+
fn=lambda: "",
|
515 |
+
outputs=[message_input]
|
516 |
)
|
517 |
|
518 |
message_input.submit(
|
519 |
+
fn=process_message,
|
520 |
inputs=[message_input, chatbot],
|
521 |
+
outputs=[chatbot]
|
522 |
+
).then(
|
523 |
+
fn=lambda: "",
|
524 |
+
outputs=[message_input]
|
525 |
)
|
526 |
|
527 |
clear_button.click(
|
|
|
530 |
)
|
531 |
|
532 |
screenshot_button.click(
|
533 |
+
fn=screenshot_quick,
|
534 |
inputs=[chatbot],
|
535 |
+
outputs=[chatbot]
|
536 |
)
|
537 |
|
538 |
token_stats_button.click(
|
539 |
+
fn=get_token_stats,
|
540 |
inputs=[chatbot],
|
541 |
outputs=[token_stats_output]
|
542 |
)
|
|
|
582 |
|
583 |
return interface
|
584 |
|
585 |
+
def cleanup_agent():
|
586 |
"""Cleanup agent resources"""
|
587 |
global agent
|
588 |
if agent:
|
589 |
+
agent.cleanup()
|
590 |
logger.info("π§Ή Agent cleaned up")
|
591 |
|
592 |
def signal_handler(signum, frame):
|
593 |
"""Handle shutdown signals"""
|
594 |
logger.info(f"π‘ Received signal {signum}, cleaning up...")
|
595 |
+
cleanup_agent()
|
|
|
|
|
596 |
sys.exit(0)
|
597 |
|
598 |
+
def main():
|
599 |
+
"""Main function to run everything"""
|
|
|
600 |
|
601 |
# Set up signal handlers
|
602 |
signal.signal(signal.SIGINT, signal_handler)
|
603 |
signal.signal(signal.SIGTERM, signal_handler)
|
604 |
|
|
|
|
|
|
|
605 |
try:
|
606 |
logger.info("π Starting MCP Browser Agent Application with Token Optimization...")
|
607 |
|
|
|
619 |
except Exception as e:
|
620 |
logger.error(f"Application error: {e}")
|
621 |
finally:
|
622 |
+
cleanup_agent()
|
623 |
|
624 |
if __name__ == "__main__":
|
625 |
try:
|
626 |
+
main()
|
627 |
except KeyboardInterrupt:
|
628 |
logger.info("π Application stopped by user")
|
629 |
except Exception as e:
|