Zelyanoth commited on
Commit
98dfc1a
Β·
verified Β·
1 Parent(s): 102f020

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -74
app.py CHANGED
@@ -78,7 +78,7 @@ class BrowserAgent:
78
  max_context_chars=2000 # Limit context size
79
  )
80
 
81
- async def generate_tools_prompt(self):
82
  """Generate a detailed prompt section about available tools"""
83
  try:
84
  tools_prompt = "\n## πŸ› οΈ AVAILABLE TOOLS\n"
@@ -124,7 +124,7 @@ Verify results before moving on
124
  Keep Browser Open
125
  Never close the session unless explicitly instructed
126
  Avoid Redundancy
127
- Don’t repeat actions (e.g., clicking) when data is already collected
128
  ## 🚨 SESSION PERSISTENCE RULES
129
  - Browser stays open for the entire conversation
130
  - Each action builds on previous state
@@ -135,7 +135,7 @@ Don’t repeat actions (e.g., clicking) when data is already collected
135
  logger.error(f"Failed to generate tools prompt: {e}")
136
  return "\n## πŸ› οΈ TOOLS\nBrowser automation tools available but not detailed.\n"
137
 
138
- async def get_system_prompt_with_tools(self):
139
  base = """🌐 Browser Agent β€” Persistent Session & Optimized Memory
140
  You are an intelligent browser automation agent (Playwright via MCP) tasked with keeping a lightweight, ongoing session:
141
  🎯 Mission
@@ -152,14 +152,14 @@ Snapshot β†’ confirmation
152
  Next plan (if needed)
153
  πŸ’‘ Best Practices
154
  Use text selectors and wait for content
155
- Pause 2β€―s between tool calls
156
  Be concise and focused on the current task it s important as soon as you have the information you came for return it
157
  If earlier context is needed, ask the user to clarify.
158
  """
159
- tools_section = await self.generate_tools_prompt()
160
  return base + tools_section
161
 
162
- async def initialize(self):
163
  """Initialize MCP client, model, session and agent"""
164
  try:
165
  logger.info("πŸš€ Initializing Browser Agent...")
@@ -172,11 +172,14 @@ If earlier context is needed, ask the user to clarify.
172
  self.model = ChatMistralAI(
173
  model="devstral-small-latest",
174
  api_key=mistral_key,
175
-
176
  )
177
  logger.info("βœ… Mistral LLM initialized with optimized settings")
178
 
179
- # MCP client
 
 
 
 
180
  self.client = MultiServerMCPClient({
181
  "browser": {
182
  "command": "npx",
@@ -186,13 +189,13 @@ If earlier context is needed, ask the user to clarify.
186
  })
187
  logger.info("βœ… MCP client created")
188
 
189
- # Start persistent session
190
  self.session_context = self.client.session("browser")
191
- self.session = await self.session_context.__aenter__()
192
  logger.info("βœ… MCP session opened")
193
 
194
- # Load tools
195
- tools = await load_mcp_tools(self.session)
196
  tools.append(SleepTool(description="Wait 2 seconds between two calls"))
197
  logger.info(f"πŸ“₯ Loaded {len(tools)} tools")
198
  self.available_tools = {t.name: t for t in tools}
@@ -201,13 +204,13 @@ If earlier context is needed, ask the user to clarify.
201
  install_tool = self.available_tools.get("browser_install")
202
  if install_tool:
203
  try:
204
- result = await install_tool.arun({})
205
  logger.info(f"πŸ“₯ Browser install: {result}")
206
  except Exception as e:
207
  logger.warning(f"⚠️ Browser install failed: {e}, continuing.")
208
 
209
  # System prompt
210
- self.system_prompt = await self.get_system_prompt_with_tools()
211
 
212
  # Create agent
213
  prompt = ChatPromptTemplate.from_messages([
@@ -238,10 +241,10 @@ If earlier context is needed, ask the user to clarify.
238
 
239
  except Exception as e:
240
  logger.error(f"❌ Initialization failed: {e}")
241
- await self.cleanup()
242
  raise
243
 
244
- async def process_query(self, query: str, chat_history: List[Tuple[str, str]]) -> str:
245
  if not self.initialized:
246
  return "❌ Agent not initialized. Please restart the application."
247
 
@@ -264,11 +267,12 @@ If earlier context is needed, ask the user to clarify.
264
  optimized_pairs = len(optimized_history)
265
  logger.info(f"πŸ’° Token optimization: {original_pairs} β†’ {optimized_pairs} history pairs")
266
 
267
- # Execute with optimized history
268
- resp = await self.agent_executor.ainvoke({
 
269
  "input": enhanced_query,
270
  "chat_history": history_messages
271
- })
272
 
273
  # Update session context with this interaction
274
  self.conversation_manager.update_session_context(
@@ -282,17 +286,19 @@ If earlier context is needed, ask the user to clarify.
282
  logger.error(f"Error processing query: {e}")
283
  return f"❌ Error: {e}\nπŸ’‘ Ask for a screenshot to diagnose."
284
 
285
- async def cleanup(self):
286
  """Cleanup resources properly"""
287
  try:
288
  if self.session_context:
289
- await self.session_context.__aexit__(None, None, None)
 
290
  logger.info("βœ… MCP session closed")
291
  self.session_context = None
292
  self.session = None
293
 
294
  if self.client:
295
- await self.client.close()
 
296
  logger.info("βœ… MCP client closed")
297
  self.client = None
298
 
@@ -325,10 +331,9 @@ If earlier context is needed, ask the user to clarify.
325
 
326
  # Global agent instance
327
  agent: Optional[BrowserAgent] = None
328
- event_loop: Optional[asyncio.AbstractEventLoop] = None
329
 
330
- async def initialize_agent_async(api_key: str) -> str:
331
- """Initialize the agent asynchronously"""
332
  global agent
333
 
334
  if not api_key.strip():
@@ -337,32 +342,32 @@ async def initialize_agent_async(api_key: str) -> str:
337
  try:
338
  # Cleanup existing agent
339
  if agent:
340
- await agent.cleanup()
341
 
342
  # Create new agent
343
  agent = BrowserAgent(api_key)
344
- await agent.initialize()
345
 
346
- info = await agent.get_system_prompt_with_tools()
347
  return f"βœ… Agent Initialized Successfully with Token Optimization!\n\n{info[:1000]}..."
348
 
349
  except Exception as e:
350
  logger.error(f"Initialization error: {e}")
351
  return f"❌ Failed to initialize agent: {e}"
352
 
353
- async def process_message_async(message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
354
- """Process message asynchronously with token optimization"""
355
  global agent
356
 
357
  if not agent or not agent.initialized:
358
  error_msg = "❌ Agent not initialized. Please initialize first with your API key."
359
  history.append([message, error_msg])
360
- return "", history
361
 
362
  if not message.strip():
363
  error_msg = "Please enter a message"
364
  history.append([message, error_msg])
365
- return "", history
366
 
367
  try:
368
  # Convert history format for the agent
@@ -372,7 +377,7 @@ async def process_message_async(message: str, history: List[List[str]]) -> Tuple
372
  stats = agent.get_token_usage_stats(agent_history)
373
 
374
  # Process the query with optimized history
375
- response = await agent.process_query(message, agent_history)
376
 
377
  # Add token savings info to response if significant savings
378
  if stats["savings_percentage"] > 50:
@@ -381,32 +386,15 @@ async def process_message_async(message: str, history: List[List[str]]) -> Tuple
381
  # Add to history
382
  history.append([message, response])
383
 
384
- return "", history
385
 
386
  except Exception as e:
387
  logger.error(f"Message processing error: {e}")
388
  error_msg = f"❌ Error: {e}\nπŸ’‘ Try asking for a screenshot to diagnose."
389
  history.append([message, error_msg])
390
- return "", history
391
-
392
- def run_in_event_loop(coro):
393
- """Run coroutine in the event loop"""
394
- global event_loop
395
- if event_loop and not event_loop.is_closed():
396
- return asyncio.run_coroutine_threadsafe(coro, event_loop).result()
397
- else:
398
- return asyncio.run(coro)
399
-
400
- # Sync wrappers for Gradio
401
- def initialize_agent_sync(api_key: str) -> str:
402
- """Sync wrapper for agent initialization"""
403
- return run_in_event_loop(initialize_agent_async(api_key))
404
-
405
- def process_message_sync(message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
406
- """Sync wrapper for message processing"""
407
- return run_in_event_loop(process_message_async(message, history))
408
 
409
- def get_token_stats_sync(history: List[List[str]]) -> str:
410
  """Get token usage statistics"""
411
  global agent
412
  if not agent or not agent.initialized:
@@ -424,6 +412,10 @@ def get_token_stats_sync(history: List[List[str]]) -> str:
424
  β€’ Estimated tokens saved: {stats['estimated_tokens_saved']:,}
425
  β€’ Savings percentage: {stats['savings_percentage']:.1f}%"""
426
 
 
 
 
 
427
  def create_interface():
428
  """Create the Gradio interface"""
429
 
@@ -509,21 +501,27 @@ def create_interface():
509
 
510
  # Event handlers
511
  init_button.click(
512
- fn=initialize_agent_sync,
513
  inputs=[api_key_input],
514
  outputs=[status_output]
515
  )
516
 
517
  send_button.click(
518
- fn=process_message_sync,
519
  inputs=[message_input, chatbot],
520
- outputs=[message_input, chatbot]
 
 
 
521
  )
522
 
523
  message_input.submit(
524
- fn=process_message_sync,
525
  inputs=[message_input, chatbot],
526
- outputs=[message_input, chatbot]
 
 
 
527
  )
528
 
529
  clear_button.click(
@@ -532,13 +530,13 @@ def create_interface():
532
  )
533
 
534
  screenshot_button.click(
535
- fn=lambda history: process_message_sync("Take a screenshot of the current page", history),
536
  inputs=[chatbot],
537
- outputs=[message_input, chatbot]
538
  )
539
 
540
  token_stats_button.click(
541
- fn=get_token_stats_sync,
542
  inputs=[chatbot],
543
  outputs=[token_stats_output]
544
  )
@@ -584,32 +582,26 @@ def create_interface():
584
 
585
  return interface
586
 
587
- async def cleanup_agent():
588
  """Cleanup agent resources"""
589
  global agent
590
  if agent:
591
- await agent.cleanup()
592
  logger.info("🧹 Agent cleaned up")
593
 
594
  def signal_handler(signum, frame):
595
  """Handle shutdown signals"""
596
  logger.info(f"πŸ“‘ Received signal {signum}, cleaning up...")
597
- global event_loop
598
- if event_loop and not event_loop.is_closed():
599
- event_loop.create_task(cleanup_agent())
600
  sys.exit(0)
601
 
602
- async def main():
603
- """Main async function to run everything"""
604
- global event_loop
605
 
606
  # Set up signal handlers
607
  signal.signal(signal.SIGINT, signal_handler)
608
  signal.signal(signal.SIGTERM, signal_handler)
609
 
610
- # Get the current event loop
611
- event_loop = asyncio.get_event_loop()
612
-
613
  try:
614
  logger.info("πŸš€ Starting MCP Browser Agent Application with Token Optimization...")
615
 
@@ -627,11 +619,11 @@ async def main():
627
  except Exception as e:
628
  logger.error(f"Application error: {e}")
629
  finally:
630
- await cleanup_agent()
631
 
632
  if __name__ == "__main__":
633
  try:
634
- asyncio.run(main())
635
  except KeyboardInterrupt:
636
  logger.info("πŸ›‘ Application stopped by user")
637
  except Exception as e:
 
78
  max_context_chars=2000 # Limit context size
79
  )
80
 
81
+ def generate_tools_prompt(self):
82
  """Generate a detailed prompt section about available tools"""
83
  try:
84
  tools_prompt = "\n## πŸ› οΈ AVAILABLE TOOLS\n"
 
124
  Keep Browser Open
125
  Never close the session unless explicitly instructed
126
  Avoid Redundancy
127
+ Don't repeat actions (e.g., clicking) when data is already collected
128
  ## 🚨 SESSION PERSISTENCE RULES
129
  - Browser stays open for the entire conversation
130
  - Each action builds on previous state
 
135
  logger.error(f"Failed to generate tools prompt: {e}")
136
  return "\n## πŸ› οΈ TOOLS\nBrowser automation tools available but not detailed.\n"
137
 
138
+ def get_system_prompt_with_tools(self):
139
  base = """🌐 Browser Agent β€” Persistent Session & Optimized Memory
140
  You are an intelligent browser automation agent (Playwright via MCP) tasked with keeping a lightweight, ongoing session:
141
  🎯 Mission
 
152
  Next plan (if needed)
153
  πŸ’‘ Best Practices
154
  Use text selectors and wait for content
155
+ Pause 2 s between tool calls
156
  Be concise and focused on the current task it s important as soon as you have the information you came for return it
157
  If earlier context is needed, ask the user to clarify.
158
  """
159
+ tools_section = self.generate_tools_prompt()
160
  return base + tools_section
161
 
162
+ def initialize(self):
163
  """Initialize MCP client, model, session and agent"""
164
  try:
165
  logger.info("πŸš€ Initializing Browser Agent...")
 
172
  self.model = ChatMistralAI(
173
  model="devstral-small-latest",
174
  api_key=mistral_key,
 
175
  )
176
  logger.info("βœ… Mistral LLM initialized with optimized settings")
177
 
178
+ # Create event loop for MCP operations
179
+ loop = asyncio.new_event_loop()
180
+ asyncio.set_event_loop(loop)
181
+
182
+ # MCP client setup (async operations in sync wrapper)
183
  self.client = MultiServerMCPClient({
184
  "browser": {
185
  "command": "npx",
 
189
  })
190
  logger.info("βœ… MCP client created")
191
 
192
+ # Start persistent session (run async operation in sync context)
193
  self.session_context = self.client.session("browser")
194
+ self.session = loop.run_until_complete(self.session_context.__aenter__())
195
  logger.info("βœ… MCP session opened")
196
 
197
+ # Load tools (async operation)
198
+ tools = loop.run_until_complete(load_mcp_tools(self.session))
199
  tools.append(SleepTool(description="Wait 2 seconds between two calls"))
200
  logger.info(f"πŸ“₯ Loaded {len(tools)} tools")
201
  self.available_tools = {t.name: t for t in tools}
 
204
  install_tool = self.available_tools.get("browser_install")
205
  if install_tool:
206
  try:
207
+ result = loop.run_until_complete(install_tool.arun({}))
208
  logger.info(f"πŸ“₯ Browser install: {result}")
209
  except Exception as e:
210
  logger.warning(f"⚠️ Browser install failed: {e}, continuing.")
211
 
212
  # System prompt
213
+ self.system_prompt = self.get_system_prompt_with_tools()
214
 
215
  # Create agent
216
  prompt = ChatPromptTemplate.from_messages([
 
241
 
242
  except Exception as e:
243
  logger.error(f"❌ Initialization failed: {e}")
244
+ self.cleanup()
245
  raise
246
 
247
+ def process_query(self, query: str, chat_history: List[Tuple[str, str]]) -> str:
248
  if not self.initialized:
249
  return "❌ Agent not initialized. Please restart the application."
250
 
 
267
  optimized_pairs = len(optimized_history)
268
  logger.info(f"πŸ’° Token optimization: {original_pairs} β†’ {optimized_pairs} history pairs")
269
 
270
+ # Execute with optimized history (run async operation in sync context)
271
+ loop = asyncio.get_event_loop()
272
+ resp = loop.run_until_complete(self.agent_executor.ainvoke({
273
  "input": enhanced_query,
274
  "chat_history": history_messages
275
+ }))
276
 
277
  # Update session context with this interaction
278
  self.conversation_manager.update_session_context(
 
286
  logger.error(f"Error processing query: {e}")
287
  return f"❌ Error: {e}\nπŸ’‘ Ask for a screenshot to diagnose."
288
 
289
+ def cleanup(self):
290
  """Cleanup resources properly"""
291
  try:
292
  if self.session_context:
293
+ loop = asyncio.get_event_loop()
294
+ loop.run_until_complete(self.session_context.__aexit__(None, None, None))
295
  logger.info("βœ… MCP session closed")
296
  self.session_context = None
297
  self.session = None
298
 
299
  if self.client:
300
+ loop = asyncio.get_event_loop()
301
+ loop.run_until_complete(self.client.close())
302
  logger.info("βœ… MCP client closed")
303
  self.client = None
304
 
 
331
 
332
  # Global agent instance
333
  agent: Optional[BrowserAgent] = None
 
334
 
335
+ def initialize_agent(api_key: str) -> str:
336
+ """Initialize the agent"""
337
  global agent
338
 
339
  if not api_key.strip():
 
342
  try:
343
  # Cleanup existing agent
344
  if agent:
345
+ agent.cleanup()
346
 
347
  # Create new agent
348
  agent = BrowserAgent(api_key)
349
+ agent.initialize()
350
 
351
+ info = agent.get_system_prompt_with_tools()
352
  return f"βœ… Agent Initialized Successfully with Token Optimization!\n\n{info[:1000]}..."
353
 
354
  except Exception as e:
355
  logger.error(f"Initialization error: {e}")
356
  return f"❌ Failed to initialize agent: {e}"
357
 
358
+ def process_message(message: str, history: List[List[str]]) -> List[List[str]]:
359
+ """Process message and return updated history"""
360
  global agent
361
 
362
  if not agent or not agent.initialized:
363
  error_msg = "❌ Agent not initialized. Please initialize first with your API key."
364
  history.append([message, error_msg])
365
+ return history
366
 
367
  if not message.strip():
368
  error_msg = "Please enter a message"
369
  history.append([message, error_msg])
370
+ return history
371
 
372
  try:
373
  # Convert history format for the agent
 
377
  stats = agent.get_token_usage_stats(agent_history)
378
 
379
  # Process the query with optimized history
380
+ response = agent.process_query(message, agent_history)
381
 
382
  # Add token savings info to response if significant savings
383
  if stats["savings_percentage"] > 50:
 
386
  # Add to history
387
  history.append([message, response])
388
 
389
+ return history
390
 
391
  except Exception as e:
392
  logger.error(f"Message processing error: {e}")
393
  error_msg = f"❌ Error: {e}\nπŸ’‘ Try asking for a screenshot to diagnose."
394
  history.append([message, error_msg])
395
+ return history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
+ def get_token_stats(history: List[List[str]]) -> str:
398
  """Get token usage statistics"""
399
  global agent
400
  if not agent or not agent.initialized:
 
412
  β€’ Estimated tokens saved: {stats['estimated_tokens_saved']:,}
413
  β€’ Savings percentage: {stats['savings_percentage']:.1f}%"""
414
 
415
+ def screenshot_quick(history: List[List[str]]) -> List[List[str]]:
416
+ """Quick screenshot function"""
417
+ return process_message("Take a screenshot of the current page", history)
418
+
419
  def create_interface():
420
  """Create the Gradio interface"""
421
 
 
501
 
502
  # Event handlers
503
  init_button.click(
504
+ fn=initialize_agent,
505
  inputs=[api_key_input],
506
  outputs=[status_output]
507
  )
508
 
509
  send_button.click(
510
+ fn=process_message,
511
  inputs=[message_input, chatbot],
512
+ outputs=[chatbot]
513
+ ).then(
514
+ fn=lambda: "",
515
+ outputs=[message_input]
516
  )
517
 
518
  message_input.submit(
519
+ fn=process_message,
520
  inputs=[message_input, chatbot],
521
+ outputs=[chatbot]
522
+ ).then(
523
+ fn=lambda: "",
524
+ outputs=[message_input]
525
  )
526
 
527
  clear_button.click(
 
530
  )
531
 
532
  screenshot_button.click(
533
+ fn=screenshot_quick,
534
  inputs=[chatbot],
535
+ outputs=[chatbot]
536
  )
537
 
538
  token_stats_button.click(
539
+ fn=get_token_stats,
540
  inputs=[chatbot],
541
  outputs=[token_stats_output]
542
  )
 
582
 
583
  return interface
584
 
585
+ def cleanup_agent():
586
  """Cleanup agent resources"""
587
  global agent
588
  if agent:
589
+ agent.cleanup()
590
  logger.info("🧹 Agent cleaned up")
591
 
592
  def signal_handler(signum, frame):
593
  """Handle shutdown signals"""
594
  logger.info(f"πŸ“‘ Received signal {signum}, cleaning up...")
595
+ cleanup_agent()
 
 
596
  sys.exit(0)
597
 
598
+ def main():
599
+ """Main function to run everything"""
 
600
 
601
  # Set up signal handlers
602
  signal.signal(signal.SIGINT, signal_handler)
603
  signal.signal(signal.SIGTERM, signal_handler)
604
 
 
 
 
605
  try:
606
  logger.info("πŸš€ Starting MCP Browser Agent Application with Token Optimization...")
607
 
 
619
  except Exception as e:
620
  logger.error(f"Application error: {e}")
621
  finally:
622
+ cleanup_agent()
623
 
624
  if __name__ == "__main__":
625
  try:
626
+ main()
627
  except KeyboardInterrupt:
628
  logger.info("πŸ›‘ Application stopped by user")
629
  except Exception as e: