Zelyanoth commited on
Commit
85e594a
Β·
verified Β·
1 Parent(s): e5cf42a

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +69 -0
  2. README.md +5 -3
  3. app.py +673 -0
  4. package.json +7 -0
  5. requirements.txt +20 -0
Dockerfile ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Playwright image as base which includes browsers and dependencies
2
+ FROM mcr.microsoft.com/playwright:v1.48.0-jammy
3
+
4
+ # Set timezone and prevent interactive prompts
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV TZ=UTC
7
+
8
+ # Install Python 3.11 and required packages
9
+ RUN apt-get update && apt-get install -y \
10
+ software-properties-common \
11
+ curl \
12
+ tzdata \
13
+ && ln -fs /usr/share/zoneinfo/$TZ /etc/localtime \
14
+ && echo $TZ > /etc/timezone \
15
+ && dpkg-reconfigure --frontend noninteractive tzdata \
16
+ && add-apt-repository ppa:deadsnakes/ppa \
17
+ && apt-get update \
18
+ && apt-get install -y \
19
+ python3.11 \
20
+ python3.11-venv \
21
+ python3.11-distutils \
22
+ python3.11-dev \
23
+ && rm -rf /var/lib/apt/lists/*
24
+
25
+ # Install pip for Python 3.11
26
+ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
27
+
28
+ # Create symlinks for python and pip commands
29
+ RUN ln -sf /usr/bin/python3.11 /usr/bin/python \
30
+ && ln -sf /usr/bin/python3.11 /usr/bin/python3
31
+
32
+ # Set working directory
33
+ WORKDIR /app
34
+
35
+ # Copy requirements first for better caching
36
+ COPY requirements.txt .
37
+
38
+ # Install Python dependencies
39
+ RUN python3.11 -m pip install --no-cache-dir -r requirements.txt
40
+
41
+ # Install the MCP Playwright package globally
42
+ RUN npm install -g @playwright/mcp@latest
43
+
44
+ # Create a non-root user (let system assign UID automatically)
45
+ RUN useradd -m appuser
46
+
47
+ # Copy application code and set ownership
48
+ COPY . .
49
+ RUN chown -R appuser:appuser /app
50
+
51
+ # Make sure the non-root user can access Playwright browsers
52
+ RUN chmod -R 755 /ms-playwright
53
+
54
+ # Switch to non-root user
55
+ USER appuser
56
+
57
+ # Set environment variables to use the pre-installed browsers
58
+ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
59
+ ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
60
+
61
+ # Expose Gradio port
62
+ EXPOSE 7860
63
+
64
+ # Set environment variables
65
+ ENV GRADIO_SERVER_NAME=0.0.0.0
66
+ ENV GRADIO_SERVER_PORT=7860
67
+
68
+ # Run the application
69
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
  title: Web Scrapper
3
- emoji: πŸ†
4
- colorFrom: purple
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Web Scrapper
3
+ emoji: πŸ“š
4
+ colorFrom: gray
5
+ colorTo: green
6
  sdk: docker
7
  pinned: false
8
+ license: apache-2.0
9
+ short_description: Web scrapper built with the help fo claude
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,673 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import asyncio
3
+ import os
4
+ from typing import List, Tuple, Optional, Dict, Any
5
+ from datetime import datetime
6
+ import logging
7
+ import signal
8
+ import sys
9
+ import json
10
+
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ try:
16
+ from mcp_use import MCPClient
17
+ from langchain_mcp_adapters.client import MultiServerMCPClient
18
+ from langchain_community.tools.sleep.tool import SleepTool
19
+ from langchain_mcp_adapters.tools import load_mcp_tools
20
+ from langchain.agents import AgentExecutor, create_tool_calling_agent
21
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
22
+ from langchain_mistralai import ChatMistralAI
23
+ except ImportError as e:
24
+ logger.error(f"Import error: {e}")
25
+ raise
26
+
27
+ class ConversationManager:
28
+ """Manages conversation history with token optimization"""
29
+
30
+ def __init__(self, max_history_pairs: int = 3, max_context_chars: int = 2000):
31
+ self.max_history_pairs = max_history_pairs
32
+ self.max_context_chars = max_context_chars
33
+ self.session_context = {} # Browser state context
34
+
35
+ def update_session_context(self, action: str, result: str):
36
+ """Update browser session context (current page, last actions, etc.)"""
37
+ self.session_context.update({
38
+ 'last_action': action,
39
+ 'last_result': result[:500], # Truncate long results
40
+ 'timestamp': datetime.now().isoformat()
41
+ })
42
+
43
+ def get_optimized_history(self, full_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
44
+ """Get optimized history with recent messages + session context"""
45
+
46
+ # Take only the last N conversation pairs
47
+ recent_history = full_history[-self.max_history_pairs:] if full_history else []
48
+
49
+ # Add session context as first "message" if we have browser state
50
+ if self.session_context:
51
+ context_msg = f"[SESSION_CONTEXT] Browser session active. Last action: {self.session_context.get('last_action', 'none')}"
52
+ recent_history.insert(0, ("system", context_msg))
53
+
54
+ return recent_history
55
+
56
+ def get_context_summary(self) -> str:
57
+ """Get a summary of current browser session state"""
58
+ if not self.session_context:
59
+ return "Browser session not active."
60
+
61
+ return f"Browser session active. Last action: {self.session_context.get('last_action', 'none')} at {self.session_context.get('timestamp', 'unknown')}"
62
+
63
+ class BrowserAgent:
64
+ def __init__(self, api_key: str):
65
+ self.api_key = api_key
66
+ self.client = None
67
+ self.session = None
68
+ self.session_context = None
69
+ self.agent_executor = None
70
+ self.model = None
71
+ self.initialized = False
72
+ self.available_tools = {}
73
+ self.system_prompt = ""
74
+
75
+ # Add conversation manager for token optimization
76
+ self.conversation_manager = ConversationManager(
77
+ max_history_pairs=3, # Only keep last 3 exchanges
78
+ max_context_chars=2000 # Limit context size
79
+ )
80
+
81
+ async def generate_tools_prompt(self):
82
+ """Generate a detailed prompt section about available tools"""
83
+ try:
84
+ tools_prompt = "\n## πŸ› οΈ AVAILABLE TOOLS\n"
85
+ tools_prompt += "You have access to the following browser automation tools via MCP:\n\n"
86
+
87
+ for tool_name, tool_info in self.available_tools.items():
88
+ tools_prompt += f"### {tool_name}\n"
89
+
90
+ # Add description from StructuredTool object
91
+ description = getattr(tool_info, 'description', 'No description available')
92
+ tools_prompt += f"**Description**: {description}\n"
93
+
94
+ # Add parameters from args_schema if available
95
+ if hasattr(tool_info, 'args_schema') and tool_info.args_schema:
96
+ try:
97
+ schema = tool_info.args_schema.model_json_schema()
98
+ if 'properties' in schema:
99
+ tools_prompt += "**Parameters**:\n"
100
+ for param_name, param_info in schema['properties'].items():
101
+ param_type = param_info.get('type', 'unknown')
102
+ param_desc = param_info.get('description', 'No description')
103
+ required = param_name in schema.get('required', [])
104
+ required_mark = " (required)" if required else " (optional)"
105
+ tools_prompt += f"- `{param_name}` ({param_type}){required_mark}: {param_desc}\n"
106
+ except Exception as schema_error:
107
+ logger.debug(f"Could not parse schema for {tool_name}: {schema_error}")
108
+ tools_prompt += "**Usage**: Call this tool when you need to perform this browser action\n"
109
+ else:
110
+ tools_prompt += "**Usage**: Call this tool when you need to perform this browser action\n"
111
+
112
+ tools_prompt += "\n"
113
+
114
+ tools_prompt += """
115
+ 🎯 Multi‑Step Workflow
116
+
117
+ Navigate & Snapshot
118
+
119
+ Load the target page
120
+
121
+ Capture a snapshot
122
+
123
+ Assess if further steps are neededβ€”if so, proceed to the next action
124
+
125
+ Perform Action & Validate
126
+
127
+ if needed closes add or popups
128
+
129
+ Capture a snapshot
130
+
131
+ Verify results before moving on
132
+
133
+ Keep Browser Open
134
+
135
+ Never close the session unless explicitly instructed
136
+
137
+ Avoid Redundancy
138
+
139
+ Don’t repeat actions (e.g., clicking) when data is already collected
140
+
141
+ ## 🚨 SESSION PERSISTENCE RULES
142
+ - Browser stays open for the entire conversation
143
+ - Each action builds on previous state
144
+ - Context is maintained between requests
145
+ """
146
+ return tools_prompt
147
+ except Exception as e:
148
+ logger.error(f"Failed to generate tools prompt: {e}")
149
+ return "\n## πŸ› οΈ TOOLS\nBrowser automation tools available but not detailed.\n"
150
+
151
+ async def get_system_prompt_with_tools(self):
152
+ base = """🌐 Browser Agent β€” Persistent Session & Optimized Memory
153
+ You are an intelligent browser automation agent (Playwright via MCP) tasked with keeping a lightweight, ongoing session:
154
+
155
+ 🎯 Mission
156
+
157
+ Navigate pages, extract and analyze data without closing the browser
158
+
159
+ Handle pop‑ups and capture snapshots to validate each step
160
+
161
+ πŸ”„ Session Management
162
+
163
+ Browser remains open across user requests
164
+
165
+ Only recent chat history is provided to save tokens
166
+
167
+ Session context (current page, recent actions) is maintained separately
168
+
169
+ ⚑ Response Structure
170
+ For each action:
171
+
172
+ State β†’ tool call
173
+
174
+ Snapshot β†’ confirmation
175
+
176
+ Next plan (if needed)
177
+
178
+ πŸ’‘ Best Practices
179
+
180
+ Use text selectors and wait for content
181
+
182
+ Pause 2β€―s between tool calls
183
+
184
+ Be concise and focused on the current task it s important as soon as you have the information you came for return it
185
+
186
+ If earlier context is needed, ask the user to clarify.
187
+
188
+ """
189
+ tools_section = await self.generate_tools_prompt()
190
+ return base + tools_section
191
+
192
+ async def initialize(self):
193
+ """Initialize MCP client, model, session and agent"""
194
+ try:
195
+ logger.info("πŸš€ Initializing Browser Agent...")
196
+
197
+ # LLM
198
+ mistral_key = os.getenv("mistralkey")
199
+ if not mistral_key:
200
+ raise ValueError("Mistral API key is required")
201
+
202
+ self.model = ChatMistralAI(
203
+ model="mistral-small-latest",
204
+ api_key=mistral_key,
205
+
206
+ )
207
+ logger.info("βœ… Mistral LLM initialized with optimized settings")
208
+
209
+ # MCP client
210
+ self.client = MultiServerMCPClient({
211
+ "browser": {
212
+ "command": "npx",
213
+ "args": ["@playwright/mcp@latest", "--browser", "chromium","--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"],
214
+ "transport": "stdio"
215
+ }
216
+ })
217
+ logger.info("βœ… MCP client created")
218
+
219
+ # Start persistent session
220
+ self.session_context = self.client.session("browser")
221
+ self.session = await self.session_context.__aenter__()
222
+ logger.info("βœ… MCP session opened")
223
+
224
+ # Load tools
225
+ tools = await load_mcp_tools(self.session)
226
+ tools.append(SleepTool(description="Wait 4 seconds between two calls"))
227
+ logger.info(f"πŸ“₯ Loaded {len(tools)} tools")
228
+ self.available_tools = {t.name: t for t in tools}
229
+
230
+ # Install browser if needed
231
+ install_tool = self.available_tools.get("browser_install")
232
+ if install_tool:
233
+ try:
234
+ result = await install_tool.arun({})
235
+ logger.info(f"πŸ“₯ Browser install: {result}")
236
+ except Exception as e:
237
+ logger.warning(f"⚠️ Browser install failed: {e}, continuing.")
238
+
239
+ # System prompt
240
+ self.system_prompt = await self.get_system_prompt_with_tools()
241
+
242
+ # Create agent
243
+ prompt = ChatPromptTemplate.from_messages([
244
+ ("system", self.system_prompt),
245
+ MessagesPlaceholder(variable_name="chat_history"),
246
+ ("human", "{input}"),
247
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
248
+ ])
249
+ agent = create_tool_calling_agent(
250
+ llm=self.model,
251
+ tools=tools,
252
+ prompt=prompt
253
+ )
254
+ self.agent_executor = AgentExecutor(
255
+ agent=agent,
256
+ tools=tools,
257
+ verbose=True,
258
+ max_iterations=15, # Reduced from 30
259
+ early_stopping_method="generate",
260
+ handle_parsing_errors=True,
261
+ return_intermediate_steps=True,
262
+ max_execution_time=180 # Reduced from 300
263
+ )
264
+
265
+ self.initialized = True
266
+ logger.info("βœ… Agent initialized with persistent session and optimized memory")
267
+ return True
268
+
269
+ except Exception as e:
270
+ logger.error(f"❌ Initialization failed: {e}")
271
+ await self.cleanup()
272
+ raise
273
+
274
+ async def process_query(self, query: str, chat_history: List[Tuple[str, str]]) -> str:
275
+ if not self.initialized:
276
+ return "❌ Agent not initialized. Please restart the application."
277
+
278
+ try:
279
+ # βœ… KEY OPTIMIZATION: Use only recent history instead of full history
280
+ optimized_history = self.conversation_manager.get_optimized_history(chat_history)
281
+
282
+ # Convert to message format
283
+ history_messages = []
284
+ for human, ai in optimized_history:
285
+ if human: history_messages.append(("human", human))
286
+ if ai: history_messages.append(("ai", ai))
287
+
288
+ # Add session context
289
+ context_summary = self.conversation_manager.get_context_summary()
290
+ enhanced_query = f"{query}\n\n[SESSION_INFO]: {context_summary}"
291
+
292
+ # Log token savings
293
+ original_pairs = len(chat_history)
294
+ optimized_pairs = len(optimized_history)
295
+ logger.info(f"πŸ’° Token optimization: {original_pairs} β†’ {optimized_pairs} history pairs")
296
+
297
+ # Execute with optimized history
298
+ resp = await self.agent_executor.ainvoke({
299
+ "input": enhanced_query,
300
+ "chat_history": history_messages
301
+ })
302
+
303
+ # Update session context with this interaction
304
+ self.conversation_manager.update_session_context(
305
+ action=query,
306
+ result=resp["output"]
307
+ )
308
+
309
+ return resp["output"]
310
+
311
+ except Exception as e:
312
+ logger.error(f"Error processing query: {e}")
313
+ return f"❌ Error: {e}\nπŸ’‘ Ask for a screenshot to diagnose."
314
+
315
+ async def cleanup(self):
316
+ """Cleanup resources properly"""
317
+ try:
318
+ if self.session_context:
319
+ await self.session_context.__aexit__(None, None, None)
320
+ logger.info("βœ… MCP session closed")
321
+ self.session_context = None
322
+ self.session = None
323
+
324
+ if self.client:
325
+ await self.client.close()
326
+ logger.info("βœ… MCP client closed")
327
+ self.client = None
328
+
329
+ self.initialized = False
330
+
331
+ except Exception as e:
332
+ logger.error(f"Cleanup error: {e}")
333
+
334
+ def get_token_usage_stats(self, full_history: List[Tuple[str, str]]) -> Dict[str, Any]:
335
+ """Get statistics about token usage optimization"""
336
+ original_pairs = len(full_history)
337
+ optimized_pairs = len(self.conversation_manager.get_optimized_history(full_history))
338
+
339
+ # Rough token estimation (1 token β‰ˆ 4 characters)
340
+ def estimate_tokens(text: str) -> int:
341
+ return len(text) // 4
342
+
343
+ original_tokens = sum(estimate_tokens(msg[0] + msg[1]) for msg in full_history)
344
+ optimized_tokens = sum(estimate_tokens(msg[0] + msg[1]) for msg in self.conversation_manager.get_optimized_history(full_history))
345
+
346
+ return {
347
+ "original_pairs": original_pairs,
348
+ "optimized_pairs": optimized_pairs,
349
+ "pairs_saved": original_pairs - optimized_pairs,
350
+ "estimated_original_tokens": original_tokens,
351
+ "estimated_optimized_tokens": optimized_tokens,
352
+ "estimated_tokens_saved": original_tokens - optimized_tokens,
353
+ "savings_percentage": ((original_tokens - optimized_tokens) / original_tokens * 100) if original_tokens > 0 else 0
354
+ }
355
+
356
+ # Global agent instance
357
+ agent: Optional[BrowserAgent] = None
358
+ event_loop: Optional[asyncio.AbstractEventLoop] = None
359
+
360
+ async def initialize_agent_async(api_key: str) -> str:
361
+ """Initialize the agent asynchronously"""
362
+ global agent
363
+
364
+ if not api_key.strip():
365
+ return "❌ Please provide a Mistral API key"
366
+
367
+ try:
368
+ # Cleanup existing agent
369
+ if agent:
370
+ await agent.cleanup()
371
+
372
+ # Create new agent
373
+ agent = BrowserAgent(api_key)
374
+ await agent.initialize()
375
+
376
+ info = await agent.get_system_prompt_with_tools()
377
+ return f"βœ… Agent Initialized Successfully with Token Optimization!\n\n{info[:1000]}..."
378
+
379
+ except Exception as e:
380
+ logger.error(f"Initialization error: {e}")
381
+ return f"❌ Failed to initialize agent: {e}"
382
+
383
+ async def process_message_async(message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
384
+ """Process message asynchronously with token optimization"""
385
+ global agent
386
+
387
+ if not agent or not agent.initialized:
388
+ error_msg = "❌ Agent not initialized. Please initialize first with your API key."
389
+ history.append([message, error_msg])
390
+ return "", history
391
+
392
+ if not message.strip():
393
+ error_msg = "Please enter a message"
394
+ history.append([message, error_msg])
395
+ return "", history
396
+
397
+ try:
398
+ # Convert history format for the agent
399
+ agent_history = [(msg[0], msg[1]) for msg in history]
400
+
401
+ # Get token usage stats before processing
402
+ stats = agent.get_token_usage_stats(agent_history)
403
+
404
+ # Process the query with optimized history
405
+ response = await agent.process_query(message, agent_history)
406
+
407
+ # Add token savings info to response if significant savings
408
+ if stats["savings_percentage"] > 50:
409
+ response += f"\n\nπŸ’° Token savings: {stats['savings_percentage']:.1f}% ({stats['estimated_tokens_saved']} tokens saved)"
410
+
411
+ # Add to history
412
+ history.append([message, response])
413
+
414
+ return "", history
415
+
416
+ except Exception as e:
417
+ logger.error(f"Message processing error: {e}")
418
+ error_msg = f"❌ Error: {e}\nπŸ’‘ Try asking for a screenshot to diagnose."
419
+ history.append([message, error_msg])
420
+ return "", history
421
+
422
+ def run_in_event_loop(coro):
423
+ """Run coroutine in the event loop"""
424
+ global event_loop
425
+ if event_loop and not event_loop.is_closed():
426
+ return asyncio.run_coroutine_threadsafe(coro, event_loop).result()
427
+ else:
428
+ return asyncio.run(coro)
429
+
430
+ # Sync wrappers for Gradio
431
+ def initialize_agent_sync(api_key: str) -> str:
432
+ """Sync wrapper for agent initialization"""
433
+ return run_in_event_loop(initialize_agent_async(api_key))
434
+
435
+ def process_message_sync(message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
436
+ """Sync wrapper for message processing"""
437
+ return run_in_event_loop(process_message_async(message, history))
438
+
439
+ def get_token_stats_sync(history: List[List[str]]) -> str:
440
+ """Get token usage statistics"""
441
+ global agent
442
+ if not agent or not agent.initialized:
443
+ return "Agent not initialized"
444
+
445
+ agent_history = [(msg[0], msg[1]) for msg in history]
446
+ stats = agent.get_token_usage_stats(agent_history)
447
+
448
+ return f"""πŸ“Š Token Usage Statistics:
449
+ β€’ Original conversation pairs: {stats['original_pairs']}
450
+ β€’ Optimized conversation pairs: {stats['optimized_pairs']}
451
+ β€’ Pairs saved: {stats['pairs_saved']}
452
+ β€’ Estimated original tokens: {stats['estimated_original_tokens']:,}
453
+ β€’ Estimated optimized tokens: {stats['estimated_optimized_tokens']:,}
454
+ β€’ Estimated tokens saved: {stats['estimated_tokens_saved']:,}
455
+ β€’ Savings percentage: {stats['savings_percentage']:.1f}%"""
456
+
457
+ def create_interface():
458
+ """Create the Gradio interface"""
459
+
460
+ with gr.Blocks(
461
+ title="MCP Browser Agent - Token Optimized",
462
+ theme=gr.themes.Soft(),
463
+ css="""
464
+ .container { max-width: 1200px; margin: auto; }
465
+ .header { text-align: center; margin-bottom: 2rem; }
466
+ .status-box { padding: 1rem; border-radius: 8px; margin: 1rem 0; }
467
+ .token-stats { background: #f0f8ff; padding: 1rem; border-radius: 8px; }
468
+ """
469
+ ) as interface:
470
+
471
+ gr.HTML("""
472
+ <div class="header">
473
+ <h1>🌐 MCP Browser Agent - Token Optimized</h1>
474
+ <p>AI-powered web browsing with persistent sessions and optimized token usage</p>
475
+ </div>
476
+ """)
477
+
478
+ with gr.Row():
479
+ with gr.Column(scale=1):
480
+ gr.Markdown("### πŸ”§ Configuration")
481
+ api_key_input = gr.Textbox(
482
+ label="Mistral API Key",
483
+ placeholder="Enter your Mistral API key...",
484
+ type="password",
485
+ lines=1
486
+ )
487
+
488
+ init_button = gr.Button("Initialize Agent", variant="primary")
489
+ status_output = gr.Textbox(
490
+ label="Status & Available Tools",
491
+ interactive=False,
492
+ lines=6
493
+ )
494
+
495
+ gr.Markdown("### πŸ’° Token Optimization")
496
+ token_stats_button = gr.Button("Show Token Stats", variant="secondary")
497
+ token_stats_output = gr.Textbox(
498
+ label="Token Usage Statistics",
499
+ interactive=False,
500
+ lines=8
501
+ )
502
+
503
+ gr.Markdown("""
504
+ ### πŸ“ Optimized Usage Tips
505
+ **Token Savings Features:**
506
+ - Only last 3 conversation pairs sent to API
507
+ - Session context maintained separately
508
+ - Reduced max tokens per response
509
+ - Smart context summarization
510
+
511
+ **Best Practices:**
512
+ - Be specific in your requests
513
+ - Use "take screenshot" to check current state
514
+ - Ask for "browser status" if you need context
515
+ - Long conversations automatically optimized
516
+ """)
517
+
518
+ with gr.Column(scale=2):
519
+ gr.Markdown("### πŸ’¬ Chat with Browser Agent")
520
+
521
+ chatbot = gr.Chatbot(
522
+ label="Conversation",
523
+ height=500,
524
+ show_copy_button=True
525
+ )
526
+
527
+ with gr.Row():
528
+ message_input = gr.Textbox(
529
+ label="Message",
530
+ placeholder="Enter your browsing request...",
531
+ lines=2,
532
+ scale=4
533
+ )
534
+ send_button = gr.Button("Send", variant="primary", scale=1)
535
+
536
+ with gr.Row():
537
+ clear_button = gr.Button("Clear Chat", variant="secondary")
538
+ screenshot_button = gr.Button("Quick Screenshot", variant="secondary")
539
+
540
+ # Event handlers
541
+ init_button.click(
542
+ fn=initialize_agent_sync,
543
+ inputs=[api_key_input],
544
+ outputs=[status_output]
545
+ )
546
+
547
+ send_button.click(
548
+ fn=process_message_sync,
549
+ inputs=[message_input, chatbot],
550
+ outputs=[message_input, chatbot]
551
+ )
552
+
553
+ message_input.submit(
554
+ fn=process_message_sync,
555
+ inputs=[message_input, chatbot],
556
+ outputs=[message_input, chatbot]
557
+ )
558
+
559
+ clear_button.click(
560
+ fn=lambda: [],
561
+ outputs=[chatbot]
562
+ )
563
+
564
+ screenshot_button.click(
565
+ fn=lambda history: process_message_sync("Take a screenshot of the current page", history),
566
+ inputs=[chatbot],
567
+ outputs=[message_input, chatbot]
568
+ )
569
+
570
+ token_stats_button.click(
571
+ fn=get_token_stats_sync,
572
+ inputs=[chatbot],
573
+ outputs=[token_stats_output]
574
+ )
575
+
576
+ # Add helpful information
577
+ with gr.Accordion("ℹ️ Token Optimization Guide", open=False):
578
+ gr.Markdown("""
579
+ ## πŸ’° How Token Optimization Works
580
+
581
+ **The Problem with Original Code:**
582
+ - Every API call sent complete conversation history
583
+ - Token usage grew exponentially with conversation length
584
+ - Costs could explode for long sessions
585
+
586
+ **Our Optimization Solutions:**
587
+
588
+ 1. **Limited History Window**: Only last 3 conversation pairs sent to API
589
+ 2. **Session Context**: Browser state maintained separately from chat history
590
+ 3. **Smart Summarization**: Key session info added to each request
591
+ 4. **Reduced Limits**: Lower max_tokens and max_iterations
592
+ 5. **Token Tracking**: Real-time savings statistics
593
+
594
+ **Token Savings Example:**
595
+ ```
596
+ Original: 10 messages = 5,000 tokens per API call
597
+ Optimized: 10 messages = 500 tokens per API call
598
+ Savings: 90% reduction in token usage!
599
+ ```
600
+
601
+ **What This Means:**
602
+ - βœ… Persistent browser sessions still work
603
+ - βœ… 90%+ reduction in API costs
604
+ - βœ… Faster response times
605
+ - βœ… Better performance for long conversations
606
+ - ⚠️ Agent has limited memory of old messages
607
+
608
+ **If Agent Needs Earlier Context:**
609
+ - Use "browser status" to check current state
610
+ - Take screenshots to show current page
611
+ - Re-explain context if needed
612
+ - Clear chat periodically for fresh start
613
+ """)
614
+
615
+ return interface
616
+
617
+ async def cleanup_agent():
618
+ """Cleanup agent resources"""
619
+ global agent
620
+ if agent:
621
+ await agent.cleanup()
622
+ logger.info("🧹 Agent cleaned up")
623
+
624
+ def signal_handler(signum, frame):
625
+ """Handle shutdown signals"""
626
+ logger.info(f"πŸ“‘ Received signal {signum}, cleaning up...")
627
+ global event_loop
628
+ if event_loop and not event_loop.is_closed():
629
+ event_loop.create_task(cleanup_agent())
630
+ sys.exit(0)
631
+
632
+ async def main():
633
+ """Main async function to run everything"""
634
+ global event_loop
635
+
636
+ # Set up signal handlers
637
+ signal.signal(signal.SIGINT, signal_handler)
638
+ signal.signal(signal.SIGTERM, signal_handler)
639
+
640
+ # Get the current event loop
641
+ event_loop = asyncio.get_event_loop()
642
+
643
+ try:
644
+ logger.info("πŸš€ Starting MCP Browser Agent Application with Token Optimization...")
645
+
646
+ # Create and launch interface
647
+ interface = create_interface()
648
+
649
+ # Launch interface (this will block)
650
+ await asyncio.to_thread(
651
+ interface.launch,
652
+ server_name="0.0.0.0",
653
+ server_port=7860,
654
+ share=False,
655
+ debug=False,
656
+ show_error=True,
657
+ quiet=False
658
+ )
659
+
660
+ except Exception as e:
661
+ logger.error(f"Application error: {e}")
662
+ finally:
663
+ await cleanup_agent()
664
+
665
+ if __name__ == "__main__":
666
+ try:
667
+ asyncio.run(main())
668
+ except KeyboardInterrupt:
669
+ logger.info("πŸ›‘ Application stopped by user")
670
+ except Exception as e:
671
+ logger.error(f"Fatal error: {e}")
672
+ finally:
673
+ logger.info("πŸ‘‹ Application shutdown complete")
package.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Web_scrapper",
3
+ "version": "1.0.0",
4
+ "devDependencies": {
5
+ "@playwright/mcp": "latest"
6
+ }
7
+ }
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ gradio>=4.0.0
3
+ langchain>=0.3.0
4
+ langchain-mistralai>=0.1.0
5
+ langchain-mcp-adapters>=0.1.0
6
+ langchain-community
7
+ mcp-use>=0.1.0
8
+
9
+ # MCP and browser automation
10
+ playwright>=1.40.0
11
+
12
+ # Additional utilities
13
+ python-dotenv>=1.0.0
14
+ pydantic>=2.0.0
15
+ aiohttp>=3.8.0
16
+ httpx>=0.24.0
17
+
18
+ # Optional: For better logging and debugging
19
+ loguru>=0.7.0
20
+ rich>=13.0.0