Zelyanoth commited on
Commit
fcb0cf1
·
verified ·
1 Parent(s): 85e594a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -577
app.py CHANGED
@@ -24,41 +24,36 @@ except ImportError as e:
24
  logger.error(f"Import error: {e}")
25
  raise
26
 
 
 
 
 
 
 
 
 
 
27
  class ConversationManager:
28
- """Manages conversation history with token optimization"""
29
-
30
  def __init__(self, max_history_pairs: int = 3, max_context_chars: int = 2000):
31
  self.max_history_pairs = max_history_pairs
32
  self.max_context_chars = max_context_chars
33
- self.session_context = {} # Browser state context
34
-
35
  def update_session_context(self, action: str, result: str):
36
- """Update browser session context (current page, last actions, etc.)"""
37
  self.session_context.update({
38
  'last_action': action,
39
- 'last_result': result[:500], # Truncate long results
40
  'timestamp': datetime.now().isoformat()
41
  })
42
-
43
  def get_optimized_history(self, full_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
44
- """Get optimized history with recent messages + session context"""
45
-
46
- # Take only the last N conversation pairs
47
- recent_history = full_history[-self.max_history_pairs:] if full_history else []
48
-
49
- # Add session context as first "message" if we have browser state
50
  if self.session_context:
51
- context_msg = f"[SESSION_CONTEXT] Browser session active. Last action: {self.session_context.get('last_action', 'none')}"
52
- recent_history.insert(0, ("system", context_msg))
53
-
54
- return recent_history
55
-
56
  def get_context_summary(self) -> str:
57
- """Get a summary of current browser session state"""
58
  if not self.session_context:
59
  return "Browser session not active."
60
-
61
- return f"Browser session active. Last action: {self.session_context.get('last_action', 'none')} at {self.session_context.get('timestamp', 'unknown')}"
62
 
63
  class BrowserAgent:
64
  def __init__(self, api_key: str):
@@ -71,603 +66,164 @@ class BrowserAgent:
71
  self.initialized = False
72
  self.available_tools = {}
73
  self.system_prompt = ""
74
-
75
- # Add conversation manager for token optimization
76
- self.conversation_manager = ConversationManager(
77
- max_history_pairs=3, # Only keep last 3 exchanges
78
- max_context_chars=2000 # Limit context size
79
- )
80
 
81
  async def generate_tools_prompt(self):
82
- """Generate a detailed prompt section about available tools"""
83
- try:
84
- tools_prompt = "\n## 🛠️ AVAILABLE TOOLS\n"
85
- tools_prompt += "You have access to the following browser automation tools via MCP:\n\n"
86
-
87
- for tool_name, tool_info in self.available_tools.items():
88
- tools_prompt += f"### {tool_name}\n"
89
-
90
- # Add description from StructuredTool object
91
- description = getattr(tool_info, 'description', 'No description available')
92
- tools_prompt += f"**Description**: {description}\n"
93
-
94
- # Add parameters from args_schema if available
95
- if hasattr(tool_info, 'args_schema') and tool_info.args_schema:
96
- try:
97
- schema = tool_info.args_schema.model_json_schema()
98
- if 'properties' in schema:
99
- tools_prompt += "**Parameters**:\n"
100
- for param_name, param_info in schema['properties'].items():
101
- param_type = param_info.get('type', 'unknown')
102
- param_desc = param_info.get('description', 'No description')
103
- required = param_name in schema.get('required', [])
104
- required_mark = " (required)" if required else " (optional)"
105
- tools_prompt += f"- `{param_name}` ({param_type}){required_mark}: {param_desc}\n"
106
- except Exception as schema_error:
107
- logger.debug(f"Could not parse schema for {tool_name}: {schema_error}")
108
- tools_prompt += "**Usage**: Call this tool when you need to perform this browser action\n"
109
- else:
110
- tools_prompt += "**Usage**: Call this tool when you need to perform this browser action\n"
111
-
112
- tools_prompt += "\n"
113
-
114
- tools_prompt += """
115
- 🎯 Multi‑Step Workflow
116
-
117
- Navigate & Snapshot
118
-
119
- Load the target page
120
-
121
- Capture a snapshot
122
-
123
- Assess if further steps are needed—if so, proceed to the next action
124
-
125
- Perform Action & Validate
126
-
127
- if needed closes add or popups
128
-
129
- Capture a snapshot
130
-
131
- Verify results before moving on
132
-
133
- Keep Browser Open
134
-
135
- Never close the session unless explicitly instructed
136
-
137
- Avoid Redundancy
138
-
139
- Don’t repeat actions (e.g., clicking) when data is already collected
140
-
141
- ## 🚨 SESSION PERSISTENCE RULES
142
- - Browser stays open for the entire conversation
143
- - Each action builds on previous state
144
- - Context is maintained between requests
145
- """
146
- return tools_prompt
147
- except Exception as e:
148
- logger.error(f"Failed to generate tools prompt: {e}")
149
- return "\n## 🛠️ TOOLS\nBrowser automation tools available but not detailed.\n"
150
 
151
  async def get_system_prompt_with_tools(self):
152
  base = """🌐 Browser Agent — Persistent Session & Optimized Memory
153
- You are an intelligent browser automation agent (Playwright via MCP) tasked with keeping a lightweight, ongoing session:
154
-
155
- 🎯 Mission
156
-
157
- Navigate pages, extract and analyze data without closing the browser
158
-
159
- Handle pop‑ups and capture snapshots to validate each step
160
-
161
- 🔄 Session Management
162
-
163
- Browser remains open across user requests
164
-
165
- Only recent chat history is provided to save tokens
166
-
167
- Session context (current page, recent actions) is maintained separately
168
-
169
- ⚡ Response Structure
170
- For each action:
171
-
172
- State → tool call
173
-
174
- Snapshot → confirmation
175
-
176
- Next plan (if needed)
177
-
178
- 💡 Best Practices
179
-
180
- Use text selectors and wait for content
181
-
182
- Pause 2 s between tool calls
183
-
184
- Be concise and focused on the current task it s important as soon as you have the information you came for return it
185
-
186
- If earlier context is needed, ask the user to clarify.
187
-
188
  """
189
  tools_section = await self.generate_tools_prompt()
190
  return base + tools_section
191
 
192
- async def initialize(self):
193
- """Initialize MCP client, model, session and agent"""
194
- try:
195
- logger.info("🚀 Initializing Browser Agent...")
196
-
197
- # LLM
198
- mistral_key = os.getenv("mistralkey")
199
- if not mistral_key:
200
- raise ValueError("Mistral API key is required")
201
-
202
- self.model = ChatMistralAI(
203
- model="mistral-small-latest",
204
- api_key=mistral_key,
205
-
206
- )
207
- logger.info(" Mistral LLM initialized with optimized settings")
208
-
209
- # MCP client
210
- self.client = MultiServerMCPClient({
211
- "browser": {
212
- "command": "npx",
213
- "args": ["@playwright/mcp@latest", "--browser", "chromium","--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"],
214
- "transport": "stdio"
215
- }
216
- })
217
- logger.info(" MCP client created")
218
-
219
- # Start persistent session
220
- self.session_context = self.client.session("browser")
221
- self.session = await self.session_context.__aenter__()
222
- logger.info("✅ MCP session opened")
223
-
224
- # Load tools
225
- tools = await load_mcp_tools(self.session)
226
- tools.append(SleepTool(description="Wait 4 seconds between two calls"))
227
- logger.info(f"📥 Loaded {len(tools)} tools")
228
- self.available_tools = {t.name: t for t in tools}
229
-
230
- # Install browser if needed
231
- install_tool = self.available_tools.get("browser_install")
232
- if install_tool:
233
- try:
234
- result = await install_tool.arun({})
235
- logger.info(f"📥 Browser install: {result}")
236
- except Exception as e:
237
- logger.warning(f"⚠️ Browser install failed: {e}, continuing.")
238
-
239
- # System prompt
240
- self.system_prompt = await self.get_system_prompt_with_tools()
241
-
242
- # Create agent
243
- prompt = ChatPromptTemplate.from_messages([
244
- ("system", self.system_prompt),
245
- MessagesPlaceholder(variable_name="chat_history"),
246
- ("human", "{input}"),
247
- MessagesPlaceholder(variable_name="agent_scratchpad"),
248
- ])
249
- agent = create_tool_calling_agent(
250
- llm=self.model,
251
- tools=tools,
252
- prompt=prompt
253
- )
254
- self.agent_executor = AgentExecutor(
255
- agent=agent,
256
- tools=tools,
257
- verbose=True,
258
- max_iterations=15, # Reduced from 30
259
- early_stopping_method="generate",
260
- handle_parsing_errors=True,
261
- return_intermediate_steps=True,
262
- max_execution_time=180 # Reduced from 300
263
- )
264
-
265
- self.initialized = True
266
- logger.info("✅ Agent initialized with persistent session and optimized memory")
267
- return True
268
-
269
- except Exception as e:
270
- logger.error(f"❌ Initialization failed: {e}")
271
- await self.cleanup()
272
- raise
273
-
274
- async def process_query(self, query: str, chat_history: List[Tuple[str, str]]) -> str:
275
- if not self.initialized:
276
- return "❌ Agent not initialized. Please restart the application."
277
-
278
- try:
279
- # ✅ KEY OPTIMIZATION: Use only recent history instead of full history
280
- optimized_history = self.conversation_manager.get_optimized_history(chat_history)
281
-
282
- # Convert to message format
283
- history_messages = []
284
- for human, ai in optimized_history:
285
- if human: history_messages.append(("human", human))
286
- if ai: history_messages.append(("ai", ai))
287
-
288
- # Add session context
289
- context_summary = self.conversation_manager.get_context_summary()
290
- enhanced_query = f"{query}\n\n[SESSION_INFO]: {context_summary}"
291
-
292
- # Log token savings
293
- original_pairs = len(chat_history)
294
- optimized_pairs = len(optimized_history)
295
- logger.info(f"💰 Token optimization: {original_pairs} → {optimized_pairs} history pairs")
296
-
297
- # Execute with optimized history
298
- resp = await self.agent_executor.ainvoke({
299
- "input": enhanced_query,
300
- "chat_history": history_messages
301
- })
302
-
303
- # Update session context with this interaction
304
- self.conversation_manager.update_session_context(
305
- action=query,
306
- result=resp["output"]
307
- )
308
-
309
- return resp["output"]
310
-
311
- except Exception as e:
312
- logger.error(f"Error processing query: {e}")
313
- return f"❌ Error: {e}\n💡 Ask for a screenshot to diagnose."
314
 
315
- async def cleanup(self):
316
- """Cleanup resources properly"""
317
- try:
318
- if self.session_context:
319
- await self.session_context.__aexit__(None, None, None)
320
- logger.info("✅ MCP session closed")
321
- self.session_context = None
322
- self.session = None
323
-
324
- if self.client:
325
- await self.client.close()
326
- logger.info("✅ MCP client closed")
327
- self.client = None
328
-
329
- self.initialized = False
330
-
331
- except Exception as e:
332
- logger.error(f"Cleanup error: {e}")
333
 
334
- def get_token_usage_stats(self, full_history: List[Tuple[str, str]]) -> Dict[str, Any]:
335
- """Get statistics about token usage optimization"""
336
- original_pairs = len(full_history)
337
- optimized_pairs = len(self.conversation_manager.get_optimized_history(full_history))
338
-
339
- # Rough token estimation (1 token ≈ 4 characters)
340
- def estimate_tokens(text: str) -> int:
341
- return len(text) // 4
342
-
343
- original_tokens = sum(estimate_tokens(msg[0] + msg[1]) for msg in full_history)
344
- optimized_tokens = sum(estimate_tokens(msg[0] + msg[1]) for msg in self.conversation_manager.get_optimized_history(full_history))
345
-
346
- return {
347
- "original_pairs": original_pairs,
348
- "optimized_pairs": optimized_pairs,
349
- "pairs_saved": original_pairs - optimized_pairs,
350
- "estimated_original_tokens": original_tokens,
351
- "estimated_optimized_tokens": optimized_tokens,
352
- "estimated_tokens_saved": original_tokens - optimized_tokens,
353
- "savings_percentage": ((original_tokens - optimized_tokens) / original_tokens * 100) if original_tokens > 0 else 0
354
- }
355
 
356
- # Global agent instance
357
  agent: Optional[BrowserAgent] = None
358
- event_loop: Optional[asyncio.AbstractEventLoop] = None
359
 
360
- async def initialize_agent_async(api_key: str) -> str:
361
- """Initialize the agent asynchronously"""
362
  global agent
363
-
364
  if not api_key.strip():
365
- return "❌ Please provide a Mistral API key"
366
-
367
  try:
368
- # Cleanup existing agent
369
- if agent:
370
- await agent.cleanup()
371
-
372
- # Create new agent
373
  agent = BrowserAgent(api_key)
374
- await agent.initialize()
375
-
376
- info = await agent.get_system_prompt_with_tools()
377
- return f"✅ Agent Initialized Successfully with Token Optimization!\n\n{info[:1000]}..."
378
-
379
  except Exception as e:
380
- logger.error(f"Initialization error: {e}")
381
- return f"❌ Failed to initialize agent: {e}"
382
 
383
- async def process_message_async(message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
384
- """Process message asynchronously with token optimization"""
385
  global agent
386
-
387
  if not agent or not agent.initialized:
388
- error_msg = "❌ Agent not initialized. Please initialize first with your API key."
389
- history.append([message, error_msg])
390
  return "", history
391
-
392
  if not message.strip():
393
- error_msg = "Please enter a message"
394
- history.append([message, error_msg])
395
  return "", history
396
-
 
397
  try:
398
- # Convert history format for the agent
399
- agent_history = [(msg[0], msg[1]) for msg in history]
400
-
401
- # Get token usage stats before processing
402
- stats = agent.get_token_usage_stats(agent_history)
403
-
404
- # Process the query with optimized history
405
- response = await agent.process_query(message, agent_history)
406
-
407
- # Add token savings info to response if significant savings
408
- if stats["savings_percentage"] > 50:
409
- response += f"\n\n💰 Token savings: {stats['savings_percentage']:.1f}% ({stats['estimated_tokens_saved']} tokens saved)"
410
-
411
- # Add to history
412
- history.append([message, response])
413
-
414
  return "", history
415
-
416
  except Exception as e:
417
- logger.error(f"Message processing error: {e}")
418
- error_msg = f"❌ Error: {e}\n💡 Try asking for a screenshot to diagnose."
419
- history.append([message, error_msg])
420
  return "", history
421
 
422
- def run_in_event_loop(coro):
423
- """Run coroutine in the event loop"""
424
- global event_loop
425
- if event_loop and not event_loop.is_closed():
426
- return asyncio.run_coroutine_threadsafe(coro, event_loop).result()
427
- else:
428
- return asyncio.run(coro)
429
-
430
- # Sync wrappers for Gradio
431
- def initialize_agent_sync(api_key: str) -> str:
432
- """Sync wrapper for agent initialization"""
433
- return run_in_event_loop(initialize_agent_async(api_key))
434
-
435
- def process_message_sync(message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
436
- """Sync wrapper for message processing"""
437
- return run_in_event_loop(process_message_async(message, history))
438
-
439
  def get_token_stats_sync(history: List[List[str]]) -> str:
440
- """Get token usage statistics"""
441
  global agent
442
  if not agent or not agent.initialized:
443
- return "Agent not initialized"
444
-
445
- agent_history = [(msg[0], msg[1]) for msg in history]
446
- stats = agent.get_token_usage_stats(agent_history)
447
-
448
- return f"""📊 Token Usage Statistics:
449
- • Original conversation pairs: {stats['original_pairs']}
450
- • Optimized conversation pairs: {stats['optimized_pairs']}
451
- • Pairs saved: {stats['pairs_saved']}
452
- • Estimated original tokens: {stats['estimated_original_tokens']:,}
453
- • Estimated optimized tokens: {stats['estimated_optimized_tokens']:,}
454
- • Estimated tokens saved: {stats['estimated_tokens_saved']:,}
455
- • Savings percentage: {stats['savings_percentage']:.1f}%"""
456
 
457
  def create_interface():
458
- """Create the Gradio interface"""
459
-
460
- with gr.Blocks(
461
- title="MCP Browser Agent - Token Optimized",
462
- theme=gr.themes.Soft(),
463
- css="""
464
- .container { max-width: 1200px; margin: auto; }
465
- .header { text-align: center; margin-bottom: 2rem; }
466
- .status-box { padding: 1rem; border-radius: 8px; margin: 1rem 0; }
467
- .token-stats { background: #f0f8ff; padding: 1rem; border-radius: 8px; }
468
- """
469
- ) as interface:
470
-
471
- gr.HTML("""
472
- <div class="header">
473
- <h1>🌐 MCP Browser Agent - Token Optimized</h1>
474
- <p>AI-powered web browsing with persistent sessions and optimized token usage</p>
475
- </div>
476
- """)
477
-
478
- with gr.Row():
479
- with gr.Column(scale=1):
480
- gr.Markdown("### 🔧 Configuration")
481
- api_key_input = gr.Textbox(
482
- label="Mistral API Key",
483
- placeholder="Enter your Mistral API key...",
484
- type="password",
485
- lines=1
486
- )
487
-
488
- init_button = gr.Button("Initialize Agent", variant="primary")
489
- status_output = gr.Textbox(
490
- label="Status & Available Tools",
491
- interactive=False,
492
- lines=6
493
- )
494
-
495
- gr.Markdown("### 💰 Token Optimization")
496
- token_stats_button = gr.Button("Show Token Stats", variant="secondary")
497
- token_stats_output = gr.Textbox(
498
- label="Token Usage Statistics",
499
- interactive=False,
500
- lines=8
501
- )
502
-
503
- gr.Markdown("""
504
- ### 📝 Optimized Usage Tips
505
- **Token Savings Features:**
506
- - Only last 3 conversation pairs sent to API
507
- - Session context maintained separately
508
- - Reduced max tokens per response
509
- - Smart context summarization
510
-
511
- **Best Practices:**
512
- - Be specific in your requests
513
- - Use "take screenshot" to check current state
514
- - Ask for "browser status" if you need context
515
- - Long conversations automatically optimized
516
- """)
517
-
518
- with gr.Column(scale=2):
519
- gr.Markdown("### 💬 Chat with Browser Agent")
520
-
521
- chatbot = gr.Chatbot(
522
- label="Conversation",
523
- height=500,
524
- show_copy_button=True
525
- )
526
-
527
- with gr.Row():
528
- message_input = gr.Textbox(
529
- label="Message",
530
- placeholder="Enter your browsing request...",
531
- lines=2,
532
- scale=4
533
- )
534
- send_button = gr.Button("Send", variant="primary", scale=1)
535
-
536
- with gr.Row():
537
- clear_button = gr.Button("Clear Chat", variant="secondary")
538
- screenshot_button = gr.Button("Quick Screenshot", variant="secondary")
539
-
540
- # Event handlers
541
- init_button.click(
542
- fn=initialize_agent_sync,
543
- inputs=[api_key_input],
544
- outputs=[status_output]
545
- )
546
-
547
- send_button.click(
548
- fn=process_message_sync,
549
- inputs=[message_input, chatbot],
550
- outputs=[message_input, chatbot]
551
- )
552
-
553
- message_input.submit(
554
- fn=process_message_sync,
555
- inputs=[message_input, chatbot],
556
- outputs=[message_input, chatbot]
557
- )
558
-
559
- clear_button.click(
560
- fn=lambda: [],
561
- outputs=[chatbot]
562
- )
563
-
564
- screenshot_button.click(
565
- fn=lambda history: process_message_sync("Take a screenshot of the current page", history),
566
- inputs=[chatbot],
567
- outputs=[message_input, chatbot]
568
- )
569
-
570
- token_stats_button.click(
571
- fn=get_token_stats_sync,
572
- inputs=[chatbot],
573
- outputs=[token_stats_output]
574
- )
575
-
576
- # Add helpful information
577
- with gr.Accordion("ℹ️ Token Optimization Guide", open=False):
578
- gr.Markdown("""
579
- ## 💰 How Token Optimization Works
580
-
581
- **The Problem with Original Code:**
582
- - Every API call sent complete conversation history
583
- - Token usage grew exponentially with conversation length
584
- - Costs could explode for long sessions
585
-
586
- **Our Optimization Solutions:**
587
-
588
- 1. **Limited History Window**: Only last 3 conversation pairs sent to API
589
- 2. **Session Context**: Browser state maintained separately from chat history
590
- 3. **Smart Summarization**: Key session info added to each request
591
- 4. **Reduced Limits**: Lower max_tokens and max_iterations
592
- 5. **Token Tracking**: Real-time savings statistics
593
-
594
- **Token Savings Example:**
595
- ```
596
- Original: 10 messages = 5,000 tokens per API call
597
- Optimized: 10 messages = 500 tokens per API call
598
- Savings: 90% reduction in token usage!
599
- ```
600
-
601
- **What This Means:**
602
- - ✅ Persistent browser sessions still work
603
- - ✅ 90%+ reduction in API costs
604
- - ✅ Faster response times
605
- - ✅ Better performance for long conversations
606
- - ⚠️ Agent has limited memory of old messages
607
-
608
- **If Agent Needs Earlier Context:**
609
- - Use "browser status" to check current state
610
- - Take screenshots to show current page
611
- - Re-explain context if needed
612
- - Clear chat periodically for fresh start
613
- """)
614
-
615
- return interface
616
 
617
- async def cleanup_agent():
618
- """Cleanup agent resources"""
619
- global agent
620
- if agent:
621
- await agent.cleanup()
622
- logger.info("🧹 Agent cleaned up")
623
 
624
  def signal_handler(signum, frame):
625
- """Handle shutdown signals"""
626
- logger.info(f"📡 Received signal {signum}, cleaning up...")
627
- global event_loop
628
- if event_loop and not event_loop.is_closed():
629
- event_loop.create_task(cleanup_agent())
630
  sys.exit(0)
631
 
632
- async def main():
633
- """Main async function to run everything"""
634
- global event_loop
635
-
636
- # Set up signal handlers
637
  signal.signal(signal.SIGINT, signal_handler)
638
  signal.signal(signal.SIGTERM, signal_handler)
639
-
640
- # Get the current event loop
641
- event_loop = asyncio.get_event_loop()
642
-
643
- try:
644
- logger.info("🚀 Starting MCP Browser Agent Application with Token Optimization...")
645
-
646
- # Create and launch interface
647
- interface = create_interface()
648
-
649
- # Launch interface (this will block)
650
- await asyncio.to_thread(
651
- interface.launch,
652
- server_name="0.0.0.0",
653
- server_port=7860,
654
- share=False,
655
- debug=False,
656
- show_error=True,
657
- quiet=False
658
- )
659
-
660
- except Exception as e:
661
- logger.error(f"Application error: {e}")
662
- finally:
663
- await cleanup_agent()
664
 
665
  if __name__ == "__main__":
666
- try:
667
- asyncio.run(main())
668
- except KeyboardInterrupt:
669
- logger.info("🛑 Application stopped by user")
670
- except Exception as e:
671
- logger.error(f"Fatal error: {e}")
672
- finally:
673
- logger.info("👋 Application shutdown complete")
 
24
  logger.error(f"Import error: {e}")
25
  raise
26
 
27
+ # 🤖 Helper pour appeler un coroutine dans un contexte synchrone
28
+ def sync_run(coro):
29
+ try:
30
+ loop = asyncio.get_running_loop()
31
+ return loop.run_until_complete(coro)
32
+ except RuntimeError:
33
+ return asyncio.run(coro)
34
+
35
+ # ConversationManager reste identique
36
  class ConversationManager:
 
 
37
  def __init__(self, max_history_pairs: int = 3, max_context_chars: int = 2000):
38
  self.max_history_pairs = max_history_pairs
39
  self.max_context_chars = max_context_chars
40
+ self.session_context = {}
 
41
  def update_session_context(self, action: str, result: str):
 
42
  self.session_context.update({
43
  'last_action': action,
44
+ 'last_result': result[:500],
45
  'timestamp': datetime.now().isoformat()
46
  })
 
47
  def get_optimized_history(self, full_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
48
+ recent = full_history[-self.max_history_pairs:] if full_history else []
 
 
 
 
 
49
  if self.session_context:
50
+ msg = f"[SESSION_CONTEXT] Last action: {self.session_context.get('last_action','none')}"
51
+ recent.insert(0, ("system", msg))
52
+ return recent
 
 
53
  def get_context_summary(self) -> str:
 
54
  if not self.session_context:
55
  return "Browser session not active."
56
+ return f"Browser session active. Last action: {self.session_context.get('last_action')} at {self.session_context.get('timestamp')}"
 
57
 
58
  class BrowserAgent:
59
  def __init__(self, api_key: str):
 
66
  self.initialized = False
67
  self.available_tools = {}
68
  self.system_prompt = ""
69
+ self.conversation_manager = ConversationManager()
 
 
 
 
 
70
 
71
  async def generate_tools_prompt(self):
72
+ # identique à l’actuel
73
+ # …
74
+ return tools_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  async def get_system_prompt_with_tools(self):
77
  base = """🌐 Browser Agent — Persistent Session & Optimized Memory
78
+ You are an intelligent browser automation agent (Playwright via MCP)...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  """
80
  tools_section = await self.generate_tools_prompt()
81
  return base + tools_section
82
 
83
+ async def initialize_async(self):
84
+ mistral_key = os.getenv("mistralkey")
85
+ if not mistral_key:
86
+ raise ValueError("Mistral API key missing")
87
+ self.model = ChatMistralAI(model="mistral-small-latest", api_key=mistral_key)
88
+ self.client = MultiServerMCPClient({
89
+ "browser": {
90
+ "command": "npx",
91
+ "args": ["@playwright/mcp@latest", "--browser", "chromium"],
92
+ "transport": "stdio"
93
+ }
94
+ })
95
+ self.session_context = self.client.session("browser")
96
+ self.session = await self.session_context.__aenter__()
97
+ tools = await load_mcp_tools(self.session)
98
+ tools.append(SleepTool(description="Wait 4 seconds"))
99
+ self.available_tools = {t.name: t for t in tools}
100
+ install = self.available_tools.get("browser_install")
101
+ if install:
102
+ try:
103
+ await install.arun({})
104
+ except Exception:
105
+ pass
106
+ self.system_prompt = await self.get_system_prompt_with_tools()
107
+ prompt = ChatPromptTemplate.from_messages([
108
+ ("system", self.system_prompt),
109
+ MessagesPlaceholder(variable_name="chat_history"),
110
+ ("human", "{input}"),
111
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
112
+ ])
113
+ agent = create_tool_calling_agent(
114
+ llm=self.model, tools=tools, prompt=prompt
115
+ )
116
+ self.agent_executor = AgentExecutor(
117
+ agent=agent, tools=tools, verbose=True,
118
+ max_iterations=15, early_stopping_method="generate",
119
+ handle_parsing_errors=True, return_intermediate_steps=True,
120
+ max_execution_time=180
121
+ )
122
+ self.initialized = True
123
+ return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ async def cleanup_async(self):
126
+ if self.session_context:
127
+ await self.session_context.__aexit__(None, None, None)
128
+ self.session_context = None
129
+ if self.client:
130
+ await self.client.close()
131
+ self.client = None
132
+ self.initialized = False
 
 
 
 
 
 
 
 
 
 
133
 
134
+ async def process_query_async(self, query: str, chat_history: List[Tuple[str, str]]) -> str:
135
+ opt_hist = self.conversation_manager.get_optimized_history(chat_history)
136
+ msgs = []
137
+ for h, a in opt_hist:
138
+ if h: msgs.append(("human", h))
139
+ if a: msgs.append(("ai", a))
140
+ summary = self.conversation_manager.get_context_summary()
141
+ enhanced = f"{query}\n\n[SESSION_INFO]: {summary}"
142
+ resp = await self.agent_executor.ainvoke({
143
+ "input": enhanced,
144
+ "chat_history": msgs
145
+ })
146
+ out = resp["output"]
147
+ self.conversation_manager.update_session_context(query, out)
148
+ return out
 
 
 
 
 
 
149
 
150
+ # Global
151
  agent: Optional[BrowserAgent] = None
 
152
 
153
+ def initialize_agent_sync(api_key: str) -> str:
 
154
  global agent
 
155
  if not api_key.strip():
156
+ return "❌ Clé Mistral requise"
 
157
  try:
158
+ if agent and agent.initialized:
159
+ sync_run(agent.cleanup_async())
 
 
 
160
  agent = BrowserAgent(api_key)
161
+ sync_run(agent.initialize_async())
162
+ info = agent.system_prompt[:1000]
163
+ return f"✅ Agent initialisé !\n\n{info}..."
 
 
164
  except Exception as e:
165
+ return f" Échec init. {e}"
 
166
 
167
+ def process_message_sync(message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
 
168
  global agent
 
169
  if not agent or not agent.initialized:
170
+ err = "❌ Agent non initialisé."
171
+ history.append([message, err])
172
  return "", history
 
173
  if not message.strip():
174
+ err = "Veuillez entrer un message."
175
+ history.append([message, err])
176
  return "", history
177
+ agent_hist = [(m[0], m[1]) for m in history]
178
+ stats_before = agent.conversation_manager.get_optimized_history(agent_hist)
179
  try:
180
+ resp = sync_run(agent.process_query_async(message, agent_hist))
181
+ history.append([message, resp])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  return "", history
 
183
  except Exception as e:
184
+ err = f" Erreur: {e}"
185
+ history.append([message, err])
 
186
  return "", history
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  def get_token_stats_sync(history: List[List[str]]) -> str:
 
189
  global agent
190
  if not agent or not agent.initialized:
191
+ return "Agent non initialisé"
192
+ orig = len(history)
193
+ opt = len(agent.conversation_manager.get_optimized_history([(m[0],m[1]) for m in history]))
194
+ # tests estimés tokens
195
+ return f"📊 Paires: {orig} → {opt}"
 
 
 
 
 
 
 
 
196
 
197
  def create_interface():
198
+ with gr.Blocks(title="MCP Browser Agent", theme=gr.themes.Soft()) as interface:
199
+ gr.Markdown("# 🌐 MCP Browser Agent")
200
+ api_input = gr.Textbox(label="Clé Mistral", type="password")
201
+ btn_init = gr.Button("Initialiser")
202
+ out_init = gr.Textbox(label="Statut", interactive=False)
203
+ btn_init.click(fn=initialize_agent_sync, inputs=[api_input], outputs=[out_init])
204
+
205
+ chatbot = gr.Chatbot(label="Conversation")
206
+ msg_input = gr.Textbox(placeholder="Écris ton message...", lines=2)
207
+ btn_send = gr.Button("Envoyer")
208
+ btn_send.click(fn=process_message_sync, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot])
209
+ msg_input.submit(fn=process_message_sync, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot])
210
+
211
+ btn_stats = gr.Button("Stats tokens")
212
+ out_stats = gr.Textbox(label="Token Stats", interactive=False)
213
+ btn_stats.click(fn=get_token_stats_sync, inputs=[chatbot], outputs=[out_stats])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
+ return interface
 
 
 
 
 
216
 
217
  def signal_handler(signum, frame):
218
+ if agent and agent.initialized:
219
+ sync_run(agent.cleanup_async())
 
 
 
220
  sys.exit(0)
221
 
222
+ def main():
 
 
 
 
223
  signal.signal(signal.SIGINT, signal_handler)
224
  signal.signal(signal.SIGTERM, signal_handler)
225
+ interface = create_interface()
226
+ interface.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  if __name__ == "__main__":
229
+ main()