iruno commited on
Commit
498ffec
·
verified ·
1 Parent(s): 5a0e61c

Upload 245 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. BrowserGym/.gitignore +154 -0
  2. BrowserGym/.pre-commit-config.yaml +44 -0
  3. BrowserGym/.readthedocs.yaml +32 -0
  4. BrowserGym/LICENSE +13 -0
  5. BrowserGym/Makefile +17 -0
  6. BrowserGym/README.md +254 -0
  7. BrowserGym/browsergym/assistantbench/README.md +21 -0
  8. BrowserGym/browsergym/assistantbench/pyproject.toml +35 -0
  9. BrowserGym/browsergym/assistantbench/requirements.txt +4 -0
  10. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/__init__.py +54 -0
  11. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_dicts.py +68 -0
  12. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_factory.py +28 -0
  13. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_numbers.py +34 -0
  14. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_strings.py +174 -0
  15. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/utils.py +25 -0
  16. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluator.py +132 -0
  17. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/task.py +142 -0
  18. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/utils.py +73 -0
  19. BrowserGym/browsergym/browsergym.egg-info/PKG-INFO +22 -0
  20. BrowserGym/browsergym/browsergym.egg-info/SOURCES.txt +6 -0
  21. BrowserGym/browsergym/browsergym.egg-info/dependency_links.txt +1 -0
  22. BrowserGym/browsergym/browsergym.egg-info/requires.txt +8 -0
  23. BrowserGym/browsergym/browsergym.egg-info/top_level.txt +1 -0
  24. BrowserGym/browsergym/core/README.md +10 -0
  25. BrowserGym/browsergym/core/pyproject.toml +42 -0
  26. BrowserGym/browsergym/core/requirements.txt +8 -0
  27. BrowserGym/browsergym/core/src/browsergym/core/__init__.py +27 -0
  28. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/__init__.cpython-311.pyc +0 -0
  29. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/chat.cpython-311.pyc +0 -0
  30. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/constants.cpython-311.pyc +0 -0
  31. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/env.cpython-311.pyc +0 -0
  32. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/observation.cpython-311.pyc +0 -0
  33. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/registration.cpython-311.pyc +0 -0
  34. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/spaces.cpython-311.pyc +0 -0
  35. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/task.cpython-311.pyc +0 -0
  36. BrowserGym/browsergym/core/src/browsergym/core/action/__init__.py +11 -0
  37. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/__init__.cpython-311.pyc +0 -0
  38. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/base.cpython-311.pyc +0 -0
  39. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/functions.cpython-311.pyc +0 -0
  40. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/highlevel.cpython-311.pyc +0 -0
  41. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/parsers.cpython-311.pyc +0 -0
  42. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/utils.cpython-311.pyc +0 -0
  43. BrowserGym/browsergym/core/src/browsergym/core/action/base.py +63 -0
  44. BrowserGym/browsergym/core/src/browsergym/core/action/functions.py +624 -0
  45. BrowserGym/browsergym/core/src/browsergym/core/action/highlevel.py +522 -0
  46. BrowserGym/browsergym/core/src/browsergym/core/action/parsers.py +92 -0
  47. BrowserGym/browsergym/core/src/browsergym/core/action/python.py +112 -0
  48. BrowserGym/browsergym/core/src/browsergym/core/action/utils.py +288 -0
  49. BrowserGym/browsergym/core/src/browsergym/core/chat.py +95 -0
  50. BrowserGym/browsergym/core/src/browsergym/core/chat_files/chatbox.html +243 -0
BrowserGym/.gitignore ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_store
2
+ .idea/
3
+ docs/src/generated/
4
+
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ pip-wheel-metadata/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99
+ __pypackages__/
100
+
101
+ # Celery stuff
102
+ celerybeat-schedule
103
+ celerybeat.pid
104
+
105
+ # SageMath parsed files
106
+ *.sage.py
107
+
108
+ # Environments
109
+ .env
110
+ .venv
111
+ env/
112
+ venv/
113
+ ENV/
114
+ env.bak/
115
+ venv.bak/
116
+
117
+ # Spyder project settings
118
+ .spyderproject
119
+ .spyproject
120
+
121
+ # Rope project settings
122
+ .ropeproject
123
+
124
+ # mkdocs documentation
125
+ /site
126
+
127
+ # mypy
128
+ .mypy_cache/
129
+ .dmypy.json
130
+ dmypy.json
131
+
132
+ # Pyre type checker
133
+ .pyre/
134
+
135
+ # error logs
136
+ error_logs.txt
137
+
138
+ # tests
139
+ tests/results
140
+ tmp.py
141
+ .vscode/**
142
+
143
+ # demo and results
144
+ results/
145
+
146
+ .vscode/launch.json
147
+
148
+ # assistantbench
149
+ tests/assistantbench/assistantbench-predictions-test.jsonl
150
+
151
+ # weblinx
152
+ bg_wl_data/
153
+
154
+ uv.lock
BrowserGym/.pre-commit-config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fail_fast: false
2
+
3
+ default_language_version:
4
+ python: python3
5
+
6
+ repos:
7
+ - repo: https://github.com/pre-commit/pre-commit-hooks
8
+ rev: v4.2.0
9
+ hooks:
10
+ - id: trailing-whitespace
11
+ exclude: ^(.*)\.md$
12
+ - id: end-of-file-fixer
13
+ - id: check-yaml
14
+ exclude: ^(.circleci/recipe|recipe) # conda build recipes are templated
15
+ - id: check-added-large-files
16
+ - repo: https://github.com/pocc/pre-commit-hooks
17
+ rev: v1.1.1
18
+ hooks:
19
+ - id: clang-format
20
+ args: [--style=file, -i]
21
+ - id: clang-tidy
22
+ args: [--fix, --fix-errors]
23
+ - repo: https://github.com/psf/black
24
+ rev: 24.2.0
25
+ hooks:
26
+ - id: black
27
+ args: [--config=./pyproject.toml]
28
+ - repo: https://github.com/asottile/blacken-docs
29
+ rev: v1.12.1
30
+ hooks:
31
+ - id: blacken-docs
32
+ args: [ '--line-length', '100' ]
33
+ additional_dependencies: [black]
34
+ - repo: https://github.com/Lucas-C/pre-commit-hooks
35
+ rev: v1.5.5
36
+ hooks:
37
+ - id: forbid-crlf
38
+ - id: remove-crlf
39
+ # Black does not clear tabs in docstrings
40
+ - id: forbid-tabs
41
+ files: '.*\.py$'
42
+ - id: remove-tabs
43
+ files: '.*\.py$'
44
+ args: [ '--whitespaces-count', '4' ]
BrowserGym/.readthedocs.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # .readthedocs.yaml
2
+ # Read the Docs configuration file
3
+ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4
+
5
+ # Required
6
+ version: 2
7
+
8
+ # Set the OS, Python version and other tools you might need
9
+ build:
10
+ os: ubuntu-22.04
11
+ tools:
12
+ python: "3.12"
13
+ # You can also specify other tool versions:
14
+ # nodejs: "19"
15
+ # rust: "1.64"
16
+ # golang: "1.19"
17
+
18
+ # Build documentation in the "docs/" directory with Sphinx
19
+ sphinx:
20
+ configuration: docs/src/conf.py
21
+
22
+ # Optionally build your docs in additional formats such as PDF and ePub
23
+ # formats:
24
+ # - pdf
25
+ # - epub
26
+
27
+ # Optional but recommended, declare the Python requirements required
28
+ # to build your documentation
29
+ # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
30
+ python:
31
+ install:
32
+ - requirements: docs/requirements.txt
BrowserGym/LICENSE ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2024 ServiceNow
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
BrowserGym/Makefile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ install:
2
+ @echo "--- 🚀 Installing project dependencies ---"
3
+ pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/
4
+ playwright install chromium
5
+
6
+ install-demo:
7
+ @echo "--- 🚀 Installing demo dependencies ---"
8
+ pip install -r demo_agent/requirements.txt
9
+ playwright install chromium
10
+
11
+ demo:
12
+ @echo "--- 🚀 Running demo agent ---"
13
+ (set -x && cd demo_agent && python run_demo.py)
14
+
15
+ test-core:
16
+ @echo "--- 🧪 Running tests ---"
17
+ pytest -n auto ./tests/core
BrowserGym/README.md ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+
3
+ ![BrowserGym banner](https://github.com/user-attachments/assets/4853f210-43ac-4107-a0d2-95c9c614dbe7)
4
+
5
+ 🛠️ [Setup](#%EF%B8%8F-setup) -
6
+ 🏋 [Usage](#-usage) -
7
+ 💻 [Demo](#-demo) -
8
+ 🌐 [Ecosystem](#-ecosystem) -
9
+ 🚀 [AgentLab](https://github.com/ServiceNow/AgentLab) -
10
+ 🌟 [Contributors](#-contributors) -
11
+ 📄 [Paper](https://arxiv.org/abs/2412.05467) -
12
+ 📝 [Citation](#-citing-this-work)
13
+
14
+ [![pypi](https://badge.fury.io/py/browsergym.svg)](https://pypi.org/project/browsergym/)
15
+ [![PyPI - License](https://img.shields.io/pypi/l/browsergym?style=flat-square)]([https://opensource.org/licenses/MIT](http://www.apache.org/licenses/LICENSE-2.0))
16
+ [![PyPI - Downloads](https://img.shields.io/pypi/dm/browsergym-core?style=flat-square)](https://pypistats.org/packages/browsergym-core)
17
+ [![GitHub star chart](https://img.shields.io/github/stars/ServiceNow/BrowserGym?style=flat-square)](https://star-history.com/#ServiceNow/BrowserGym)
18
+ [![Code Format](https://github.com/ServiceNow/BrowserGym/actions/workflows/code_format.yml/badge.svg)](https://github.com/ServiceNow/BrowserGym/actions/workflows/code_format.yml)
19
+ [![Tests](https://github.com/ServiceNow/BrowserGym/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/ServiceNow/BrowserGym/actions/workflows/unit_tests.yml)
20
+
21
+ ```python
22
+ pip install browsergym
23
+ ```
24
+
25
+ </div>
26
+
27
+ > [!WARNING]
28
+ > BrowserGym is meant to provide an open, easy-to-use and extensible framework to accelerate the field of web agent research.
29
+ > It is not meant to be a consumer product. Use with caution!
30
+
31
+ > [!TIP]
32
+ > 🚀 Check out [AgentLab](https://github.com/ServiceNow/AgentLab)✨ !
33
+ > A seamless framework to implement, test, and evaluate your web agents on all BrowserGym benchmarks.
34
+
35
+ https://github.com/ServiceNow/BrowserGym/assets/26232819/e0bfc788-cc8e-44f1-b8c3-0d1114108b85
36
+
37
+ _Example of a GPT4-V agent executing openended tasks (top row, chat interactive), as well as WebArena and WorkArena tasks (bottom row)._
38
+
39
+ BrowserGym includes the following benchmarks by default:
40
+ - [MiniWoB](https://miniwob.farama.org/)
41
+ - [WebArena](https://webarena.dev/)
42
+ - [VisualWebArena](https://jykoh.com/vwa)
43
+ - [WorkArena](https://github.com/ServiceNow/WorkArena)
44
+ - [AssistantBench](https://github.com/oriyor/assistantbench)
45
+ - [WebLINX](https://github.com/McGill-NLP/weblinx) (static benchmark)
46
+
47
+ Designing new web benchmarks with BrowserGym is easy, and simply requires to inherit the [`AbstractBrowserTask`](https://github.com/ServiceNow/BrowserGym/blob/main/browsergym/core/src/browsergym/core/task.py#L7C7-L7C26) class.
48
+
49
+ ## 🛠️ Setup
50
+
51
+ To use browsergym, install one of the following packages:
52
+ ```sh
53
+ pip install browsergym # (recommended) everything below
54
+ pip install browsergym-experiments # experiment utilities (agent, loop, benchmarks) + everything below
55
+ pip install browsergym-core # core functionalities only (no benchmark, just the openended task)
56
+ pip install browsergym-miniwob # core + miniwob
57
+ pip install browsergym-webarena # core + webarena
58
+ pip install browsergym-visualwebarena # core + visualwebarena
59
+ pip install browsergym-workarena # core + workarena
60
+ pip install browsergym-assistantbench # core + assistantbench
61
+ pip install weblinx-browsergym # core + weblinx
62
+ ```
63
+
64
+ Then setup playwright by running
65
+ ```sh
66
+ playwright install chromium
67
+ ```
68
+
69
+ Finally, each benchmark comes with its own specific setup that requires to follow additional steps.
70
+ - for MiniWoB++, see [miniwob/README.md](browsergym/miniwob/README.md)
71
+ - for WebArena, see [webarena/README.md](browsergym/webarena/README.md)
72
+ - for VisualWebArena, see [visualwebarena/README.md](browsergym/visualwebarena/README.md)
73
+ - for WorkArena, see [WorkArena](https://github.com/ServiceNow/WorkArena)
74
+ - for AssistantBench, see [assistantbench/README.md](browsergym/assistantbench/README.md)
75
+
76
+ ### 🏗️ Development setup
77
+
78
+ To install browsergym locally for development, use the following commands:
79
+ ```sh
80
+ git clone git@github.com:ServiceNow/BrowserGym.git
81
+ cd BrowserGym
82
+ make install
83
+ ```
84
+
85
+ Contributions are welcome! 😊
86
+
87
+ ## 🏋 Usage
88
+
89
+ Boilerplate code to run an agent on an interactive, open-ended task:
90
+ ```python
91
+ import gymnasium as gym
92
+ import browsergym.core # register the openended task as a gym environment
93
+
94
+ # start an openended environment
95
+ env = gym.make(
96
+ "browsergym/openended",
97
+ task_kwargs={"start_url": "https://www.google.com/"}, # starting URL
98
+ wait_for_user_message=True, # wait for a user message after each agent message sent to the chat
99
+ )
100
+ # run the environment <> agent loop until termination
101
+ obs, info = env.reset()
102
+ while True:
103
+ action = ... # implement your agent here
104
+ obs, reward, terminated, truncated, info = env.step(action)
105
+ if terminated or truncated:
106
+ break
107
+ # release the environment
108
+ env.close()
109
+ ```
110
+
111
+ MiniWoB
112
+ ```python
113
+ import gymnasium as gym
114
+ import browsergym.miniwob # register miniwob tasks as gym environments
115
+
116
+ # start a miniwob task
117
+ env = gym.make("browsergym/miniwob.choose-list")
118
+ ...
119
+
120
+ # list all the available miniwob tasks
121
+ env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/miniwob")]
122
+ print("\n".join(env_ids))
123
+ ```
124
+
125
+ WorkArena
126
+ ```python
127
+ import gymnasium as gym
128
+ import browsergym.workarena # register workarena tasks as gym environments
129
+
130
+ # start a workarena task
131
+ env = gym.make("browsergym/workarena.servicenow.order-ipad-pro")
132
+ ...
133
+
134
+ # list all the available workarena tasks
135
+ env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/workarena")]
136
+ print("\n".join(env_ids))
137
+ ```
138
+
139
+ WebArena
140
+ ```python
141
+ import gymnasium as gym
142
+ import browsergym.webarena # register webarena tasks as gym environments
143
+
144
+ # start a webarena task
145
+ env = gym.make("browsergym/webarena.310")
146
+ ...
147
+
148
+ # list all the available webarena tasks
149
+ env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/webarena")]
150
+ print("\n".join(env_ids))
151
+ ```
152
+
153
+ VisualWebArena
154
+ ```python
155
+ import gymnasium as gym
156
+ import browsergym.webarena # register webarena tasks as gym environments
157
+
158
+ # start a visualwebarena task
159
+ env = gym.make("browsergym/visualwebarena.721")
160
+ ...
161
+
162
+ # list all the available visualwebarena tasks
163
+ env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/visualwebarena")]
164
+ print("\n".join(env_ids))
165
+ ```
166
+
167
+ AssistantBench
168
+ ```python
169
+ import gymnasium as gym
170
+ import browsergym.workarena # register assistantbench tasks as gym environments
171
+
172
+ # start an assistantbench task
173
+ env = gym.make("browsergym/assistantbench.validation.3")
174
+ ...
175
+
176
+ # list all the available assistantbench tasks
177
+ env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/workarena")]
178
+ print("\n".join(env_ids))
179
+ ```
180
+
181
+ ## 💻 Demo
182
+
183
+ If you want to experiment with a demo agent in BrowserGym, follow these steps
184
+ ```sh
185
+ # conda setup
186
+ conda env create -f demo_agent/environment.yml
187
+ conda activate demo_agent
188
+
189
+ # or pip setup
190
+ pip install -r demo_agent/requirements.txt
191
+
192
+ # then download the browser for playwright
193
+ playwright install chromium
194
+ ```
195
+
196
+ Our demo agent uses `openai` as a backend, be sure to set your `OPENAI_API_KEY`.
197
+
198
+ Launch the demo agent as follows
199
+ ```sh
200
+ # openended (interactive chat mode)
201
+ python demo_agent/run_demo.py --task_name openended --start_url https://www.google.com
202
+
203
+ # miniwob
204
+ python demo_agent/run_demo.py --task_name miniwob.click-test
205
+
206
+ # workarena
207
+ python demo_agent/run_demo.py --task_name workarena.servicenow.order-standard-laptop
208
+
209
+ # webarena
210
+ python demo_agent/run_demo.py --task_name webarena.4
211
+
212
+ # visualwebarena
213
+ python demo_agent/run_demo.py --task_name visualwebarena.398
214
+ ```
215
+
216
+ You can customize your experience by changing the `model_name` to your preferred LLM (it uses `gpt-4o-mini` by default), adding screenshots for your VLMs with `use_screenshot`, and much more!
217
+
218
+ ```python
219
+ python demo_agent/run_demo.py --help
220
+ ```
221
+
222
+ ## 🌐 Ecosystem
223
+
224
+ - [AgentLab](https://github.com/ServiceNow/AgentLab): Seamlessly run agents on benchmarks, collect and analyse traces.
225
+ - [WorkArena(++)](https://github.com/ServiceNow/WorkArena): A benchmark for web agents on the ServiceNow platform.
226
+ - [WebArena](https://github.com/web-arena-x/webarena): A benchmark of realistic web tasks on self-hosted domains.
227
+ - [VisualWebArena](https://github.com/web-arena-x/visualwebarena): A benchmark of realistic visual web tasks on self-hosted domains.
228
+ - [MiniWoB(++)](https://miniwob.farama.org/): A collection of over 100 web tasks on synthetic web pages.
229
+ - [WebLINX](https://github.com/McGill-NLP/weblinx): A dataset of real-world web interaction traces.
230
+ - [AssistantBench](https://github.com/oriyor/assistantbench): A benchmark of realistic and time-consuming tasks on the open web.
231
+ - [DoomArena](https://github.com/ServiceNow/DoomArena): A framework for AI agent security testing which supports injecting attacks into web pages from Browsergym environments.
232
+
233
+ ## 🌟 Contributors
234
+
235
+ [![BrowserGym contributors](https://contrib.rocks/image?repo=ServiceNow/BrowserGym&max=2000)](https://github.com/ServiceNow/BrowserGym/graphs/contributors)
236
+
237
+ ## 📝 Citing This Work
238
+
239
+ Please use the following BibTeX to cite our work:
240
+ ```tex
241
+ @inproceedings{workarena2024,
242
+ title = {{W}ork{A}rena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?},
243
+ author = {Drouin, Alexandre and Gasse, Maxime and Caccia, Massimo and Laradji, Issam H. and Del Verme, Manuel and Marty, Tom and Vazquez, David and Chapados, Nicolas and Lacoste, Alexandre},
244
+ booktitle = {Proceedings of the 41st International Conference on Machine Learning},
245
+ pages = {11642--11662},
246
+ year = {2024},
247
+ editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix},
248
+ volume = {235},
249
+ series = {Proceedings of Machine Learning Research},
250
+ month = {21--27 Jul},
251
+ publisher = {PMLR},
252
+ url = {https://proceedings.mlr.press/v235/drouin24a.html},
253
+ }
254
+ ```
BrowserGym/browsergym/assistantbench/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AssistantBench <> BrowserGym
2
+
3
+ This package provides an implementation for using the [AssistantBench](https://assistantbench.github.io/) benchmark in BrowserGym.
4
+
5
+ Because AssistantBench includes open-ended tasks, setup is extremely easy and simply requires installing the package.
6
+
7
+ Please note that AssistantBench has a hidden test set, so test set predictions will need to be uploaded to the official [leaderboard](https://huggingface.co/spaces/AssistantBench/leaderboard).
8
+
9
+ ## Setting up
10
+
11
+ - Install the package (this is still a wip)
12
+ ```
13
+ pip install browsergym-assistantbench
14
+ ```
15
+
16
+ - Run inference, e.g., run the following commands for demo on a simple toy task
17
+ ```
18
+ python demo_agent/run_demo.py --task_name assistantbench.validation.3
19
+ ```
20
+
21
+ - Test set predictions will be saved to `./assistantbench-predictions-test.jsonl`. To evaluate on the official test set, upload these predictions to the official [leaderboard](https://huggingface.co/spaces/AssistantBench/leaderboard).
BrowserGym/browsergym/assistantbench/pyproject.toml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-requirements-txt"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "browsergym-assistantbench"
7
+ description = "AssistantBench benchmark for BrowserGym"
8
+ authors = [
9
+ {name = "Ori Yoran"},
10
+ {name = "Maxime Gasse"},
11
+ ]
12
+ readme = "README.md"
13
+ requires-python = ">3.7"
14
+ license = {text = "Apache-2.0"}
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Programming Language :: Python :: 3",
18
+ "Operating System :: OS Independent",
19
+ "Intended Audience :: Science/Research",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ "License :: OSI Approved :: Apache Software License",
22
+ ]
23
+ dynamic = ["dependencies", "version"]
24
+
25
+ [project.urls]
26
+ homepage = "https://github.com/ServiceNow/BrowserGym"
27
+
28
+ [tool.hatch.version]
29
+ path = "../core/src/browsergym/core/__init__.py"
30
+
31
+ [tool.hatch.metadata.hooks.requirements_txt]
32
+ files = ["requirements.txt"]
33
+
34
+ [tool.hatch.build.targets.wheel]
35
+ packages = ["src/browsergym"]
BrowserGym/browsergym/assistantbench/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ browsergym-core==0.13.4
2
+ datasets
3
+ scipy
4
+ numpy
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/__init__.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from browsergym.core.registration import register_task
2
+
3
+ from . import task
4
+
5
+ TOY_AB_TASK_IDS = []
6
+ VALID_AB_TASK_IDS = []
7
+ TEST_AB_TASK_IDS = []
8
+
9
+
10
+ # register a toy easy task for testing implementation
11
+ gym_id = f"assistantbench.imp.0"
12
+ register_task(
13
+ gym_id,
14
+ task.AssistantBenchTask,
15
+ task_kwargs={
16
+ "task_id": f"imp.0",
17
+ },
18
+ default_task_kwargs={
19
+ "save_predictions": False, # can be overriden
20
+ },
21
+ )
22
+ TOY_AB_TASK_IDS.append(gym_id)
23
+
24
+ # register the AssistantBench dev set
25
+ for task_id in range(33):
26
+ gym_id = f"assistantbench.validation.{task_id}"
27
+ register_task(
28
+ gym_id,
29
+ task.AssistantBenchTask,
30
+ task_kwargs={
31
+ "task_id": f"validation.{task_id}",
32
+ },
33
+ default_task_kwargs={
34
+ "save_predictions": False, # can be overriden
35
+ },
36
+ )
37
+ VALID_AB_TASK_IDS.append(gym_id)
38
+
39
+ # register the AssistantBench test set
40
+ for task_id in range(181):
41
+ gym_id = f"assistantbench.test.{task_id}"
42
+ register_task(
43
+ gym_id,
44
+ task.AssistantBenchTask,
45
+ task_kwargs={
46
+ "task_id": f"test.{task_id}",
47
+ },
48
+ default_task_kwargs={
49
+ "save_predictions": True, # can be overriden
50
+ },
51
+ )
52
+ TEST_AB_TASK_IDS.append(gym_id)
53
+
54
+ ALL_AB_TASK_IDS = TOY_AB_TASK_IDS + VALID_AB_TASK_IDS + TEST_AB_TASK_IDS
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_dicts.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List
2
+
3
+ import numpy as np
4
+
5
+ from .utils import _align_bags
6
+
7
+
8
+ def calculate_f1_score(precision, recall):
9
+ if precision + recall == 0:
10
+ return 0 # Handle the case to avoid division by zero
11
+ return 2 * (precision * recall) / (precision + recall)
12
+
13
+
14
+ def calc_recall(pred: Dict, gold: Dict, use_gold_for_eval: bool):
15
+ from .evaluate_factory import get_evaluator_from_gold_answer
16
+
17
+ recall = []
18
+ for gold_key, gold_value in gold.items():
19
+ pred_value = pred.get(gold_key)
20
+ gold_value = fix_number(gold_value)
21
+ pred_value = fix_number(pred_value)
22
+ if gold_key not in pred:
23
+ recall.append(0)
24
+ else:
25
+ evaluator = (
26
+ get_evaluator_from_gold_answer(type(gold_value))
27
+ if use_gold_for_eval
28
+ else get_evaluator_from_gold_answer(type(pred_value))
29
+ )
30
+ if type(pred_value) != type(gold_value):
31
+ recall.append(0)
32
+ continue
33
+ recall.append(evaluator(pred_value, gold_value))
34
+ avg_recall = np.average(recall)
35
+ return avg_recall
36
+
37
+
38
+ def fix_number(number):
39
+
40
+ if type(number) == str:
41
+ copy_ans = number
42
+ copy_ans = " ".join(
43
+ " ".join(" ".join(copy_ans.split("$")).split("%")).split("sqft")
44
+ ).strip()
45
+ copy_ans = copy_ans.strip()
46
+ copy_ans = copy_ans.replace(",", ".")
47
+ try:
48
+ return float(copy_ans)
49
+ except:
50
+ return number
51
+ elif type(number) == int:
52
+ return float(number)
53
+ else:
54
+ return number
55
+
56
+
57
+ def evaluate_pair_of_dicts(pred: Dict, gold: Dict):
58
+ recall = calc_recall(pred, gold, True)
59
+ precision = calc_recall(gold, pred, False)
60
+ f1 = calculate_f1_score(precision, recall)
61
+ return f1
62
+
63
+
64
+ def evaluate_dicts(pred: List[Dict], gold: List[Dict]):
65
+ if not (type(pred) == dict or len(pred) == 0 or (type(pred) == list and type(pred[0]) == dict)):
66
+ return 0
67
+ max_alignment_scores = _align_bags(pred, gold, evaluate_pair_of_dicts)
68
+ return np.average(max_alignment_scores)
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_factory.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union
2
+
3
+ from .evaluate_dicts import evaluate_dicts
4
+ from .evaluate_numbers import evaluate_numbers
5
+ from .evaluate_strings import evaluate_strings
6
+
7
+ EvaluatorFactory = {
8
+ "string": evaluate_strings,
9
+ "number": evaluate_numbers,
10
+ "json": evaluate_dicts,
11
+ "string list": evaluate_strings,
12
+ }
13
+
14
+ EvaluatorFactoryFromType = {
15
+ str: evaluate_strings,
16
+ int: evaluate_numbers,
17
+ float: evaluate_numbers,
18
+ bool: evaluate_strings,
19
+ list: evaluate_strings,
20
+ }
21
+
22
+
23
+ def get_evaluator(evaluator: str):
24
+ return EvaluatorFactory[evaluator]
25
+
26
+
27
+ def get_evaluator_from_gold_answer(gold_answer: Union[str, int, float]):
28
+ return EvaluatorFactoryFromType[gold_answer]
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_numbers.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union
2
+
3
+ import numpy as np
4
+
5
+
6
+ # Renamed calc_z function to distance_function_log
7
+ def distance_function_log(pred: float, gold: float):
8
+ if pred == gold == 0:
9
+ return 1
10
+ if pred == 0:
11
+ pred = 1e-4
12
+ if gold == 0:
13
+ gold = 1e-4
14
+ if pred > gold:
15
+ return max(0, 1 - np.log(pred / gold))
16
+ else:
17
+ return max(0, 1 - np.log(gold / pred))
18
+
19
+
20
+ def evaluate_numbers(pred: Union[float, str], gold: float):
21
+ res = None
22
+ if type(pred) != float and type(pred) != int:
23
+ try:
24
+ pred = float(pred)
25
+ except ValueError:
26
+ res = 0
27
+ if type(gold) != float and type(gold) != int:
28
+ try:
29
+ gold = float(gold)
30
+ except ValueError:
31
+ res = 0
32
+ if res is None:
33
+ res = distance_function_log(pred, gold)
34
+ return res
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_strings.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluation for two strings or list of strings.
3
+ Code taken from the DROP benchmark - https://github.com/allenai/allennlp-reading-comprehension/blob/master/allennlp_rc/eval/drop_eval.py
4
+ """
5
+
6
+ import re
7
+ import string
8
+ from typing import List, Set, Tuple, Union
9
+
10
+ import numpy as np
11
+ from scipy.optimize import linear_sum_assignment
12
+
13
+
14
+ # From here through _normalize_answer was originally copied from:
15
+ # https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/
16
+ # Then cleaned up and modified a bit.
17
+ def _remove_articles(text: str) -> str:
18
+ regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
19
+ return re.sub(regex, " ", text)
20
+
21
+
22
+ def _white_space_fix(text: str) -> str:
23
+ return " ".join(text.split())
24
+
25
+
26
+ EXCLUDE = set(string.punctuation)
27
+
28
+
29
+ def _remove_punc(text: str) -> str:
30
+ if not _is_number(text):
31
+ return "".join(ch for ch in text if ch not in EXCLUDE)
32
+ else:
33
+ return text
34
+
35
+
36
+ def _lower(text: str) -> str:
37
+ return text.lower()
38
+
39
+
40
+ def _tokenize(text: str) -> List[str]:
41
+ return re.split(" |-", text)
42
+
43
+
44
+ def _normalize_answer(text: str) -> str:
45
+ """Lower text and remove punctuation, articles and extra whitespace."""
46
+
47
+ parts = [
48
+ _white_space_fix(_remove_articles(_normalize_number(_remove_punc(_lower(token)))))
49
+ for token in _tokenize(text)
50
+ ]
51
+ parts = [part for part in parts if part.strip()]
52
+ normalized = " ".join(parts).strip()
53
+ return normalized
54
+
55
+
56
+ def _is_number(text: str) -> bool:
57
+ try:
58
+ float(text)
59
+ return True
60
+ except ValueError:
61
+ return False
62
+
63
+
64
+ def _normalize_number(text: str) -> str:
65
+ if _is_number(text):
66
+ return str(float(text))
67
+ else:
68
+ return text
69
+
70
+
71
+ def _answer_to_bags(
72
+ answer: Union[str, List[str], Tuple[str, ...]]
73
+ ) -> Tuple[List[str], List[Set[str]]]:
74
+ if isinstance(answer, (list, tuple)):
75
+ raw_spans = answer
76
+ else:
77
+ raw_spans = [answer]
78
+ normalized_spans: List[str] = []
79
+ token_bags = []
80
+ for raw_span in raw_spans:
81
+ normalized_span = _normalize_answer(raw_span)
82
+ normalized_spans.append(normalized_span)
83
+ token_bags.append(set(normalized_span.split()))
84
+ return normalized_spans, token_bags
85
+
86
+
87
+ def _align_bags(predicted: List[Set[str]], gold: List[Set[str]]) -> List[float]:
88
+ """
89
+ Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
90
+ between them and gets maximum metric values over all the answers.
91
+ """
92
+ scores = np.zeros([len(gold), len(predicted)])
93
+ for gold_index, gold_item in enumerate(gold):
94
+ for pred_index, pred_item in enumerate(predicted):
95
+ if _match_numbers_if_present(gold_item, pred_item):
96
+ scores[gold_index, pred_index] = _compute_f1(pred_item, gold_item)
97
+ row_ind, col_ind = linear_sum_assignment(-scores)
98
+
99
+ max_scores = np.zeros([max(len(gold), len(predicted))])
100
+ for row, column in zip(row_ind, col_ind):
101
+ max_scores[row] = max(max_scores[row], scores[row, column])
102
+ return max_scores
103
+
104
+
105
+ def _compute_f1(predicted_bag: Set[str], gold_bag: Set[str]) -> float:
106
+ intersection = len(gold_bag.intersection(predicted_bag))
107
+ if not predicted_bag:
108
+ precision = 1.0
109
+ else:
110
+ precision = intersection / float(len(predicted_bag))
111
+ if not gold_bag:
112
+ recall = 1.0
113
+ else:
114
+ recall = intersection / float(len(gold_bag))
115
+ f1 = (
116
+ (2 * precision * recall) / (precision + recall)
117
+ if not (precision == 0.0 and recall == 0.0)
118
+ else 0.0
119
+ )
120
+ return f1
121
+
122
+
123
+ def _match_numbers_if_present(gold_bag: Set[str], predicted_bag: Set[str]) -> bool:
124
+ gold_numbers = set()
125
+ predicted_numbers = set()
126
+ for word in gold_bag:
127
+ if _is_number(word):
128
+ gold_numbers.add(word)
129
+ for word in predicted_bag:
130
+ if _is_number(word):
131
+ predicted_numbers.add(word)
132
+ if (not gold_numbers) or gold_numbers.intersection(predicted_numbers):
133
+ return True
134
+ return False
135
+
136
+
137
+ def get_metrics(
138
+ predicted: Union[str, List[str], Tuple[str, ...]],
139
+ gold: Union[str, List[str], Tuple[str, ...]],
140
+ ) -> Tuple[float, float]:
141
+ """
142
+ Takes a predicted answer and a gold answer (that are both either a string or a list of
143
+ strings), and returns exact match and the DROP F1 metric for the prediction. If you are
144
+ writing a script for evaluating objects in memory (say, the output of predictions during
145
+ validation, or while training), this is the function you want to call, after using
146
+ :func:`answer_json_to_strings` when reading the gold answer from the released data file.
147
+ """
148
+ predicted_bags = _answer_to_bags(predicted)
149
+ gold_bags = _answer_to_bags(gold)
150
+
151
+ if set(predicted_bags[0]) == set(gold_bags[0]) and len(predicted_bags[0]) == len(gold_bags[0]):
152
+ exact_match = 1.0
153
+ else:
154
+ exact_match = 0.0
155
+
156
+ f1_per_bag = _align_bags(predicted_bags[1], gold_bags[1])
157
+ f1 = np.mean(f1_per_bag)
158
+ f1 = round(f1, 2)
159
+ return exact_match, f1
160
+
161
+
162
+ def evaluate_strings(prediction, gold):
163
+ if type(prediction) != list and type(prediction) != str:
164
+ prediction = str(prediction)
165
+ if type(gold) != list and type(gold) != str:
166
+ gold = str(gold)
167
+ try:
168
+ predicted_bags = _answer_to_bags(prediction)
169
+ gold_bags = _answer_to_bags(gold)
170
+ f1_per_bag = _align_bags(predicted_bags[1], gold_bags[1])
171
+ f1 = np.mean(f1_per_bag)
172
+ except Exception:
173
+ f1 = 0.0
174
+ return f1
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/utils.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Callable, List, Set
2
+
3
+ import numpy as np
4
+ from scipy.optimize import linear_sum_assignment
5
+
6
+
7
+ def _align_bags(
8
+ predicted: List[Set[str]],
9
+ gold: List[Set[str]],
10
+ method: Callable[[object, object], float],
11
+ ) -> List[float]:
12
+ """
13
+ Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
14
+ between them and gets maximum metric values over all the answers.
15
+ """
16
+ scores = np.zeros([len(gold), len(predicted)])
17
+ for gold_index, gold_item in enumerate(gold):
18
+ for pred_index, pred_item in enumerate(predicted):
19
+ scores[gold_index, pred_index] = method(pred_item, gold_item)
20
+ row_ind, col_ind = linear_sum_assignment(-scores)
21
+
22
+ max_scores = np.zeros([max(len(gold), len(predicted))])
23
+ for row, column in zip(row_ind, col_ind):
24
+ max_scores[row] = max(max_scores[row], scores[row, column])
25
+ return max_scores
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluator.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # todo export evaluation to a python package
2
+
3
+ import json
4
+
5
+ import numpy as np
6
+
7
+ from .evaluate_utils.evaluate_factory import get_evaluator
8
+
9
+
10
+ def find_isnan(samp):
11
+ try:
12
+ if np.isnan(samp):
13
+ return True
14
+ else:
15
+ return False
16
+ except:
17
+ return False
18
+
19
+
20
+ def fix_ans(answer):
21
+ try:
22
+ answer = (
23
+ answer.replace("{'", '{"')
24
+ .replace("', '", '", "')
25
+ .replace("': '", '": "')
26
+ .replace("'}", '"}')
27
+ )
28
+ answer = answer.replace("': ", '": ')
29
+ return answer
30
+ except:
31
+ return answer
32
+
33
+
34
+ def parse_answer(answer):
35
+ if len(answer) == 1:
36
+ ans, is_num = fix_number(answer[0])
37
+ if is_num:
38
+ return ans, "number"
39
+ try:
40
+ ans = json.loads(fix_ans(answer[0]))
41
+ return [ans], "json"
42
+ except:
43
+ ans, is_num = fix_number(answer[0])
44
+ if is_num:
45
+ return ans, "number"
46
+ else:
47
+ return answer[0], "string"
48
+ else:
49
+ try:
50
+ ans = [json.loads(fix_ans(ex)) for ex in answer]
51
+ return ans, "json"
52
+ except:
53
+ return answer, "string list"
54
+
55
+
56
+ def fix_number(number):
57
+ if type(number) == str:
58
+ copy_ans = number
59
+ copy_ans = " ".join(
60
+ " ".join(" ".join(copy_ans.split("$")).split("%")).split("sqft")
61
+ ).strip()
62
+ copy_ans = copy_ans.strip()
63
+ copy_ans = copy_ans.replace(",", ".").replace(" square kilometers", "")
64
+ try:
65
+ return float(copy_ans), True
66
+ except:
67
+ return number, False
68
+ elif type(number) == int:
69
+ return float(number), True
70
+ else:
71
+ return number, True
72
+
73
+
74
+ def fix_prediction(prediction, gold_answer, evaluator):
75
+ if (
76
+ type(prediction) == list
77
+ and len(prediction) == 1
78
+ and (
79
+ type(prediction[0]) == int
80
+ or ((type(prediction[0]) == str) and prediction[0].isnumeric())
81
+ )
82
+ ):
83
+ prediction = fix_number(prediction[0])
84
+
85
+ if type(prediction) != list:
86
+ prediction, is_num = fix_number(prediction)
87
+ if evaluator == "json":
88
+ try:
89
+ prediction = [json.loads(pred) for pred in prediction.split("\n")]
90
+ except:
91
+ prediction = [prediction]
92
+
93
+ if (hasattr(type(prediction), "__len__")) and (len(prediction) == 0):
94
+ return prediction, False
95
+
96
+ if (type(prediction) == list and len(prediction) > 1) and type(gold_answer) == float:
97
+ return prediction, False
98
+
99
+ return prediction, True
100
+
101
+
102
+ def question_scorer(prediction, gold_answer):
103
+ try:
104
+ prediction = json.loads(prediction)
105
+ except:
106
+ prediction = prediction
107
+
108
+ answer_list = (
109
+ [x for x in gold_answer.split("\n") if len(x.strip()) > 0]
110
+ if type(gold_answer) != list
111
+ else gold_answer
112
+ )
113
+ gold_answer, evaluator = parse_answer(answer_list)
114
+ prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)
115
+
116
+ has_ans = 1.0
117
+ if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
118
+ has_ans = 0.0
119
+
120
+ if type(prediction) == list:
121
+ if all(
122
+ (type(pred) not in {float, int} and len(pred) == 0) or find_isnan(pred)
123
+ for pred in prediction
124
+ ):
125
+ has_ans = 0
126
+
127
+ if not run_eval:
128
+ return 0.0, has_ans
129
+
130
+ metric_eval = get_evaluator(evaluator)
131
+ accuracy = metric_eval(prediction, gold_answer)
132
+ return accuracy, has_ans
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/task.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from typing import Dict, Tuple
4
+
5
+ from datasets import load_dataset
6
+ from playwright.sync_api import Page
7
+
8
+ from browsergym.core.task import AbstractBrowserTask
9
+
10
+ from .evaluation.evaluator import question_scorer
11
+ from .utils import add_prediction_to_jsonl
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _DEFAULT_OUTPUT_FILE = None
16
+
17
+
18
+ def set_default_output_file(output_file: str):
19
+ global _DEFAULT_OUTPUT_FILE
20
+ _DEFAULT_OUTPUT_FILE = output_file
21
+
22
+
23
+ def get_default_output_file():
24
+ return _DEFAULT_OUTPUT_FILE
25
+
26
+
27
+ # Load dataset
28
+
29
+ DATA_DATASET = "AssistantBench/AssistantBench"
30
+ all_tasks = load_dataset(DATA_DATASET, trust_remote_code=True)
31
+
32
+
33
+ # Extract answers and tasks for validation and test splits
34
+ def extract_data(split_name: str) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
35
+ return (
36
+ {
37
+ f"{split_name}.{i}": row["answer"] if row["answer"] is not None else ""
38
+ for i, row in enumerate(all_tasks[split_name])
39
+ },
40
+ {f"{split_name}.{i}": row["task"] for i, row in enumerate(all_tasks[split_name])},
41
+ {f"{split_name}.{i}": row["id"] for i, row in enumerate(all_tasks[split_name])},
42
+ )
43
+
44
+
45
+ # Implementation data for testing
46
+ def get_implementation_testing_data() -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
47
+ return (
48
+ {"imp.0": "20"},
49
+ {
50
+ "imp.0": "What is the weather in Paris yesterday in Celsius? Answer with the number only."
51
+ },
52
+ {"imp.0": "test_imp_id_0"},
53
+ )
54
+
55
+
56
+ # Combine dev, test, and implementation-specific testing splits
57
+ gold_answers_dev, tasks_dev, ids_dev = extract_data("validation")
58
+ gold_answers_test, tasks_test, ids_test = extract_data("test")
59
+ gold_answers_impl_testing, tasks_test_impl_testing, ids_imp_testing = (
60
+ get_implementation_testing_data()
61
+ )
62
+ gold_answers = {**gold_answers_dev, **gold_answers_test, **gold_answers_impl_testing}
63
+ tasks = {**tasks_dev, **tasks_test, **tasks_test_impl_testing}
64
+ ids = {**ids_dev, **ids_test, **ids_imp_testing}
65
+
66
+
67
+ class AssistantBenchTask(AbstractBrowserTask):
68
+
69
+ @classmethod
70
+ def get_task_id(cls) -> str:
71
+ """
72
+ Generic class for several task ids, this way of obtaining the task id is not compatible for now.
73
+ """
74
+ raise NotImplementedError
75
+
76
+ def __init__(
77
+ self, seed: int, task_id: str, output_file: str = None, save_predictions: bool = False
78
+ ) -> None:
79
+ """
80
+ Args:
81
+ seed (int): Random seed for task initialization.
82
+ task_id (str): Unique identifier for the task (for the BrowserGym environment).
83
+ output_file (str, optional): Path to the output file for saving results, needed for test set.
84
+ save_predictions (bool, optional): Save predictions to the output file (yes/no).
85
+ """
86
+ super().__init__(seed)
87
+ self.locale = "en-US"
88
+ self.timezone_id = "America/New_York"
89
+
90
+ self.task_id = task_id
91
+ self.start_url = "https://google.com"
92
+ self.goal = tasks[str(self.task_id)]
93
+ self.gold = gold_answers[str(self.task_id)]
94
+ self.ab_task_id = ids[self.task_id]
95
+ self.save_predictions = save_predictions
96
+
97
+ self.output_file = output_file
98
+
99
+ # set output_file using the global default value, if not provided in constructor
100
+ if not self.output_file:
101
+ self.output_file = get_default_output_file()
102
+ # use env variable in last resort
103
+ if not self.output_file:
104
+ self.output_file = os.getenv("ASSISTANTBENCH_OUTPUT_FILE", None)
105
+
106
+ if self.save_predictions and self.output_file:
107
+ logger.info(f"Task prediction will be written to output file {self.output_file}")
108
+
109
+ def setup(self, page: Page) -> Tuple[str, dict]:
110
+ logger.info(f"Navigating to start url: {self.start_url}")
111
+ page.goto(self.start_url, timeout=50000)
112
+ if self.save_predictions and self.output_file:
113
+ # create an empty task entry in the output file (will raise an Exception if the entry is already there)
114
+ add_prediction_to_jsonl(
115
+ file_path=self.output_file,
116
+ task_id=self.ab_task_id,
117
+ prediction="",
118
+ override_if_exists=False,
119
+ )
120
+ return self.goal, {}
121
+
122
+ def teardown(self) -> None:
123
+ pass
124
+
125
+ def validate(self, page: Page, chat_messages: list[dict]) -> Tuple[float, bool, str, dict]:
126
+ accuracy, done, msg, info = 0.0, False, "", {}
127
+
128
+ # eval when the agent returns a response
129
+ if chat_messages and chat_messages[-1]["role"] == "assistant":
130
+ done = True
131
+ prediction = chat_messages[-1]["message"]
132
+ if self.save_predictions and self.output_file:
133
+ # update the task entry in the output file
134
+ add_prediction_to_jsonl(
135
+ file_path=self.output_file,
136
+ task_id=self.ab_task_id,
137
+ prediction=prediction,
138
+ override_if_exists=True,
139
+ )
140
+ accuracy, has_ans = question_scorer(prediction, self.gold)
141
+
142
+ return accuracy, done, msg, info
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/utils.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ import pathlib
5
+ import time
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def add_prediction_to_jsonl(
11
+ file_path: str, task_id: str, prediction: object, override_if_exists: bool
12
+ ) -> None:
13
+ """
14
+ Multiprocessing-safe file write.
15
+ """
16
+ lock_file_path = pathlib.Path(file_path).with_suffix(".lock")
17
+ lock_max_wait = 10 # 10 seconds
18
+
19
+ # Acquire lock (atomic file creation)
20
+ start_time = time.time()
21
+ while True:
22
+ try:
23
+ fd = os.open(lock_file_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
24
+ with os.fdopen(fd, "w") as f:
25
+ f.write("lock")
26
+ break
27
+ except FileExistsError:
28
+ # give up if max wait time reached
29
+ seconds_waited = time.time() - start_time
30
+ if seconds_waited >= lock_max_wait:
31
+ raise RuntimeError(
32
+ f"Lock file could not be acquired after {seconds_waited} seconds ({lock_file_path})"
33
+ )
34
+ # wait for lock release
35
+ logger.info(f"Waiting for lock file to be released: {lock_file_path}")
36
+ time.sleep(1) # 1 sec
37
+
38
+ logger.info(f"Lock file acquired: {lock_file_path}")
39
+
40
+ # Check if the file exists, if not, create it
41
+ if not os.path.exists(file_path):
42
+ with open(file_path, "w") as f:
43
+ pass # Create an empty file
44
+
45
+ # Load existing data, if any
46
+ data = []
47
+ if os.path.exists(file_path):
48
+ with open(file_path, "r") as f:
49
+ data.extend([json.loads(line) for line in f if line.strip()]) # Skip empty lines
50
+
51
+ # Check if task_id already exists
52
+ existing_record = next((entry for entry in data if entry["id"] == task_id), None)
53
+
54
+ # Add or update the record
55
+ if not existing_record:
56
+ # Add new record
57
+ data.append({"id": task_id, "answer": prediction})
58
+ elif override_if_exists:
59
+ # Update existing record
60
+ existing_record["answer"] = prediction
61
+ else:
62
+ raise ValueError(
63
+ f"Prediction for task ID {repr(task_id)} already exists in file {file_path}."
64
+ )
65
+
66
+ # Write data back to the file
67
+ with open(file_path, "w") as f:
68
+ for entry in data:
69
+ f.write(json.dumps(entry) + "\n")
70
+
71
+ # Release lock (remove file)
72
+ os.remove(lock_file_path)
73
+ logger.info(f"Lock file released: {lock_file_path}")
BrowserGym/browsergym/browsergym.egg-info/PKG-INFO ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: browsergym
3
+ Version: 0.13.4
4
+ Summary: BrowserGym: a gym environment for web task automation in the Chromium browser
5
+ Author: Rim Assouel, Léo Boisvert, Massimo Caccia, Alex Drouin, Maxime Gasse, Imene Kerboua, Alex Lacoste, Thibault Le Sellier De Chezelles, Tom Marty, Aman Jaiswal
6
+ License: Apache-2.0
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Requires-Python: >3.10
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: browsergym-core==0.13.4
16
+ Requires-Dist: browsergym-miniwob==0.13.4
17
+ Requires-Dist: browsergym-webarena==0.13.4
18
+ Requires-Dist: browsergym-visualwebarena==0.13.4
19
+ Requires-Dist: browsergym-assistantbench==0.13.4
20
+ Requires-Dist: browsergym-experiments==0.13.4
21
+ Requires-Dist: browsergym-workarena>=0.4.1
22
+ Requires-Dist: weblinx-browsergym>=0.0.2
BrowserGym/browsergym/browsergym.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pyproject.toml
2
+ browsergym.egg-info/PKG-INFO
3
+ browsergym.egg-info/SOURCES.txt
4
+ browsergym.egg-info/dependency_links.txt
5
+ browsergym.egg-info/requires.txt
6
+ browsergym.egg-info/top_level.txt
BrowserGym/browsergym/browsergym.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
BrowserGym/browsergym/browsergym.egg-info/requires.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ browsergym-core==0.13.4
2
+ browsergym-miniwob==0.13.4
3
+ browsergym-webarena==0.13.4
4
+ browsergym-visualwebarena==0.13.4
5
+ browsergym-assistantbench==0.13.4
6
+ browsergym-experiments==0.13.4
7
+ browsergym-workarena>=0.4.1
8
+ weblinx-browsergym>=0.0.2
BrowserGym/browsergym/browsergym.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
BrowserGym/browsergym/core/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # BrowserGym core
2
+
3
+ This package provides `browsergym.core`, which provides the core functionalities of [BrowserGym](https://github.com/ServiceNow/BrowserGym).
4
+
5
+ ## Setup
6
+
7
+ 1. Install the package
8
+ ```sh
9
+ pip install browsergym-core
10
+ ```
BrowserGym/browsergym/core/pyproject.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-requirements-txt"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "browsergym-core"
7
+ description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
8
+ authors = [
9
+ {name = "Rim Assouel"},
10
+ {name = "Léo Boisvert"},
11
+ {name = "Massimo Caccia"},
12
+ {name = "Alex Drouin"},
13
+ {name = "Maxime Gasse"},
14
+ {name = "Imene Kerboua"},
15
+ {name = "Alex Lacoste"},
16
+ {name = "Thibault Le Sellier De Chezelles"},
17
+ {name = "Tom Marty"},
18
+ ]
19
+ readme = "README.md"
20
+ requires-python = ">3.9"
21
+ license = {text = "Apache-2.0"}
22
+ classifiers = [
23
+ "Development Status :: 3 - Alpha",
24
+ "Programming Language :: Python :: 3",
25
+ "Operating System :: OS Independent",
26
+ "Intended Audience :: Science/Research",
27
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
28
+ "License :: OSI Approved :: Apache Software License",
29
+ ]
30
+ dynamic = ["dependencies", "version"]
31
+
32
+ [project.urls]
33
+ homepage = "https://github.com/ServiceNow/BrowserGym"
34
+
35
+ [tool.hatch.version]
36
+ path = "src/browsergym/core/__init__.py"
37
+
38
+ [tool.hatch.metadata.hooks.requirements_txt]
39
+ files = ["requirements.txt"]
40
+
41
+ [tool.hatch.build.targets.wheel]
42
+ packages = ["src/browsergym"]
BrowserGym/browsergym/core/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ playwright==1.44
2
+ gymnasium>=0.27
3
+ numpy>=1.14
4
+ pyparsing>=3
5
+ Pillow>=10.1
6
+ beautifulsoup4>=4.12
7
+ lxml>=4.9
8
+ mcp[cli]>=1.6.0
BrowserGym/browsergym/core/src/browsergym/core/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __version__ = "0.13.4"
2
+
3
+ import playwright.sync_api
4
+
5
+ # we use a global playwright instance
6
+ _PLAYWRIGHT = None
7
+
8
+
9
+ def _set_global_playwright(pw: playwright.sync_api.Playwright):
10
+ global _PLAYWRIGHT
11
+ _PLAYWRIGHT = pw
12
+
13
+
14
+ def _get_global_playwright():
15
+ global _PLAYWRIGHT
16
+ if not _PLAYWRIGHT:
17
+ pw = playwright.sync_api.sync_playwright().start()
18
+ _set_global_playwright(pw)
19
+
20
+ return _PLAYWRIGHT
21
+
22
+
23
+ # register the open-ended task
24
+ from .registration import register_task
25
+ from .task import OpenEndedTask
26
+
27
+ register_task(OpenEndedTask.get_task_id(), OpenEndedTask)
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.14 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/chat.cpython-311.pyc ADDED
Binary file (6.89 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/constants.cpython-311.pyc ADDED
Binary file (428 Bytes). View file
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/env.cpython-311.pyc ADDED
Binary file (31.2 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/observation.cpython-311.pyc ADDED
Binary file (22.7 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/registration.cpython-311.pyc ADDED
Binary file (3.49 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/spaces.cpython-311.pyc ADDED
Binary file (8.42 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/task.cpython-311.pyc ADDED
Binary file (5.53 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/action/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _DEMO_MODE = False
2
+
3
+
4
+ def set_global_demo_mode(demo_mode: bool):
5
+ global _DEMO_MODE
6
+ _DEMO_MODE = demo_mode
7
+
8
+
9
+ def get_global_demo_mode():
10
+ global _DEMO_MODE
11
+ return _DEMO_MODE
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (561 Bytes). View file
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/base.cpython-311.pyc ADDED
Binary file (3.12 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/functions.cpython-311.pyc ADDED
Binary file (26.2 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/highlevel.cpython-311.pyc ADDED
Binary file (12.4 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/parsers.cpython-311.pyc ADDED
Binary file (6.82 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/utils.cpython-311.pyc ADDED
Binary file (12.2 kB). View file
 
BrowserGym/browsergym/core/src/browsergym/core/action/base.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+ import playwright.sync_api
4
+
5
+ from . import get_global_demo_mode
6
+
7
+
8
+ class AbstractActionSet(ABC):
9
+ def __init__(self, strict: bool = False):
10
+ self.strict = strict
11
+
12
+ @abstractmethod
13
+ def describe(self, with_long_description: bool = True, with_examples: bool = True) -> str:
14
+ """
15
+ Returns a textual description of this action space.
16
+ """
17
+
18
+ @abstractmethod
19
+ def example_action(self, abstract: bool) -> str:
20
+ """
21
+ Returns an example action as a string.
22
+ """
23
+
24
+ @abstractmethod
25
+ def to_python_code(self, action) -> str:
26
+ """
27
+ Converts the given action to browsergym-compatible python code.
28
+
29
+ Args:
30
+ action: the action to convert.
31
+
32
+ Returns:
33
+ Executable python code that performs the action in a browsergym environment.
34
+ """
35
+
36
+
37
+ def execute_python_code(
38
+ code: str,
39
+ page: playwright.sync_api.Page,
40
+ send_message_to_user: callable,
41
+ report_infeasible_instructions: callable,
42
+ ):
43
+ """
44
+ Executes Python code in a new context, except for a playwright `page` object and a `send_message_to_user` function.
45
+
46
+ WARNING: this is not safe!
47
+ https://stackoverflow.com/questions/77655440/can-you-protect-a-python-variable-with-exec
48
+
49
+ Args:
50
+ code: the Python code to execute, as a string.
51
+ page: the playwright page that will be made accessible to the code.
52
+ send_message_to_user: utility function that will be made accessible to the code. It should take one text argument.
53
+ report_infeasible_instructions: utility function that will be made accessible to the code. It should take one text argument.
54
+ """
55
+
56
+ globals = {
57
+ "page": page,
58
+ "send_message_to_user": send_message_to_user,
59
+ "report_infeasible_instructions": report_infeasible_instructions,
60
+ "DEMO_MODE": get_global_demo_mode(),
61
+ }
62
+
63
+ exec(code, globals)
BrowserGym/browsergym/core/src/browsergym/core/action/functions.py ADDED
@@ -0,0 +1,624 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # these are placeholders
2
+ # all these symbols will be available in browsergym actions
3
+ from typing import Literal
4
+
5
+ import playwright.sync_api
6
+
7
+ from .utils import (
8
+ add_demo_mode_effects,
9
+ call_fun,
10
+ get_elem_by_bid,
11
+ highlight_by_box,
12
+ smooth_move_visual_cursor_to,
13
+ )
14
+
15
+ page: playwright.sync_api.Page = None
16
+ send_message_to_user: callable = None
17
+ report_infeasible_instructions: callable = None
18
+ demo_mode: Literal["off", "default", "all_blue", "only_visible_elements"] = None
19
+ retry_with_force: bool = False
20
+
21
+ """IMPORTANT
22
+ The following primitives are meant to be included in the browsergym action using
23
+ inspect.getsource().
24
+ """
25
+
26
+
27
+ def send_msg_to_user(text: str):
28
+ """
29
+ Sends a message to the user.
30
+
31
+ Examples:
32
+ send_msg_to_user("Based on the results of my search, the city was built in 1751.")
33
+ """
34
+ send_message_to_user(text)
35
+
36
+
37
+ def report_infeasible(reason: str):
38
+ """
39
+ Notifies the user that their instructions are infeasible.
40
+
41
+ Examples:
42
+ report_infeasible("I cannot follow these instructions because there is no email field in this form.")
43
+ """
44
+ report_infeasible_instructions(reason)
45
+
46
+
47
+ def noop(wait_ms: float = 1000):
48
+ """
49
+ Do nothing, and optionally wait for the given time (in milliseconds).
50
+
51
+ Examples:
52
+ noop()
53
+ noop(500)
54
+ """
55
+ page.wait_for_timeout(wait_ms)
56
+
57
+
58
+ # https://playwright.dev/docs/input#text-input
59
+ def fill(bid: str, value: str):
60
+ """
61
+ Fill out a form field. It focuses the element and triggers an input event with the entered text.
62
+ It works for <input>, <textarea> and [contenteditable] elements.
63
+
64
+ Examples:
65
+ fill('237', 'example value')
66
+ fill('45', "multi-line\\nexample")
67
+ fill('a12', "example with \\"quotes\\"")
68
+ """
69
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
70
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
71
+
72
+ def do(force: bool):
73
+ if demo_mode != "off":
74
+ delay = max(2000 / len(value), 10)
75
+ elem.clear(force=force, timeout=500)
76
+ elem.type(value, delay=delay, timeout=0) # no timeout
77
+ else:
78
+ elem.fill(value, force=force, timeout=500)
79
+
80
+ call_fun(do, retry_with_force)
81
+
82
+
83
+ # https://playwright.dev/python/docs/api/class-locator#locator-check
84
+ def check(bid: str):
85
+ """
86
+ Ensure a checkbox or radio element is checked.
87
+
88
+ Examples:
89
+ check('55')
90
+ """
91
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
92
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
93
+
94
+ def do(force: bool):
95
+ elem.check(force=force, timeout=500)
96
+
97
+ call_fun(do, retry_with_force)
98
+
99
+
100
+ # https://playwright.dev/python/docs/api/class-locator#locator-uncheck
101
+ def uncheck(bid: str):
102
+ """
103
+ Ensure a checkbox or radio element is unchecked.
104
+
105
+ Examples:
106
+ uncheck('a5289')
107
+ """
108
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
109
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
110
+
111
+ def do(force: bool):
112
+ elem.uncheck(force=force, timeout=500)
113
+
114
+ call_fun(do, retry_with_force)
115
+
116
+
117
+ # https://playwright.dev/docs/input#select-options
118
+ def select_option(bid: str, options: str | list[str]):
119
+ """
120
+ Select one or multiple options in a <select> element. You can specify
121
+ option value or label to select. Multiple options can be selected.
122
+
123
+ Examples:
124
+ select_option('a48', "blue")
125
+ select_option('c48', ["red", "green", "blue"])
126
+ """
127
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
128
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
129
+
130
+ def do(force: bool):
131
+ elem.select_option(options, force=force, timeout=500)
132
+
133
+ call_fun(do, retry_with_force)
134
+
135
+
136
+ # https://playwright.dev/python/docs/api/class-locator#locator-click
137
+ def click(
138
+ bid: str,
139
+ button: Literal["left", "middle", "right"] = "left",
140
+ modifiers: list[Literal["Alt", "Control", "ControlOrMeta", "Meta", "Shift"]] = [],
141
+ ):
142
+ """
143
+ Click an element.
144
+
145
+ Examples:
146
+ click('a51')
147
+ click('b22', button="right")
148
+ click('48', button="middle", modifiers=["Shift"])
149
+ """
150
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
151
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
152
+
153
+ def do(force: bool):
154
+ elem.click(button=button, modifiers=modifiers, force=force, timeout=500)
155
+
156
+ call_fun(do, retry_with_force)
157
+
158
+
159
+ # https://playwright.dev/python/docs/api/class-locator#locator-dblclick
160
+ def dblclick(
161
+ bid: str,
162
+ button: Literal["left", "middle", "right"] = "left",
163
+ modifiers: list[Literal["Alt", "Control", "ControlOrMeta", "Meta", "Shift"]] = [],
164
+ ):
165
+ """
166
+ Double click an element.
167
+
168
+ Examples:
169
+ dblclick('12')
170
+ dblclick('ca42', button="right")
171
+ dblclick('178', button="middle", modifiers=["Shift"])
172
+ """
173
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
174
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
175
+
176
+ def do(force: bool):
177
+ elem.click(button=button, modifiers=modifiers, force=force, timeout=500)
178
+
179
+ call_fun(do, retry_with_force)
180
+
181
+
182
+ # https://playwright.dev/python/docs/api/class-locator#locator-hover
183
+ def hover(bid: str):
184
+ """
185
+ Hover over an element.
186
+
187
+ Examples:
188
+ hover('b8')
189
+ """
190
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
191
+ add_demo_mode_effects(
192
+ page, elem, bid, demo_mode=demo_mode, move_cursor=True, highlight_box=False
193
+ )
194
+
195
+ def do(force: bool):
196
+ elem.hover(force=force, timeout=500)
197
+
198
+ call_fun(do, retry_with_force)
199
+
200
+
201
+ # https://playwright.dev/python/docs/input#keys-and-shortcuts
202
+ def press(bid: str, key_comb: str):
203
+ """
204
+ Focus the matching element and press a combination of keys. It accepts
205
+ the logical key names that are emitted in the keyboardEvent.key property
206
+ of the keyboard events: Backquote, Minus, Equal, Backslash, Backspace,
207
+ Tab, Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
208
+ ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
209
+ alternatively specify a single character you'd like to produce such as "a"
210
+ or "#". Following modification shortcuts are also supported: Shift, Control,
211
+ Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta resolves to Control on
212
+ Windows and Linux and to Meta on macOS.
213
+
214
+ Examples:
215
+ press('88', 'Backspace')
216
+ press('a26', 'ControlOrMeta+a')
217
+ press('a61', 'Meta+Shift+t')
218
+ """
219
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
220
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
221
+ elem.press(key_comb, timeout=500)
222
+
223
+
224
+ # https://playwright.dev/python/docs/api/class-locator#locator-focus
225
+ def focus(bid: str):
226
+ """
227
+ Focus the matching element.
228
+
229
+ Examples:
230
+ focus('b455')
231
+ """
232
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
233
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
234
+ elem.focus(timeout=500)
235
+
236
+
237
+ # https://playwright.dev/python/docs/api/class-locator#locator-clear
238
+ def clear(bid: str):
239
+ """
240
+ Clear the input field.
241
+
242
+ Examples:
243
+ clear('996')
244
+ """
245
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
246
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
247
+ elem.clear(timeout=500)
248
+
249
+
250
+ # https://playwright.dev/python/docs/input#drag-and-drop
251
+ def drag_and_drop(from_bid: str, to_bid: str):
252
+ """
253
+ Perform a drag & drop. Hover the element that will be dragged. Press
254
+ left mouse button. Move mouse to the element that will receive the
255
+ drop. Release left mouse button.
256
+
257
+ Examples:
258
+ drag_and_drop('56', '498')
259
+ """
260
+ from_elem = get_elem_by_bid(page, from_bid, demo_mode != "off")
261
+ add_demo_mode_effects(page, from_elem, from_bid, demo_mode=demo_mode, move_cursor=True)
262
+ from_elem.hover(timeout=500)
263
+ page.mouse.down()
264
+
265
+ to_elem = get_elem_by_bid(page, to_bid, demo_mode != "off")
266
+ add_demo_mode_effects(page, to_elem, to_bid, demo_mode=demo_mode, move_cursor=True)
267
+ to_elem.hover(timeout=500)
268
+ page.mouse.up()
269
+
270
+
271
+ # https://playwright.dev/python/docs/api/class-mouse#mouse-wheel
272
+ def scroll(delta_x: float, delta_y: float):
273
+ """
274
+ Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event.
275
+
276
+ Examples:
277
+ scroll(0, 200)
278
+ scroll(-50.2, -100.5)
279
+ """
280
+ page.mouse.wheel(delta_x, delta_y)
281
+
282
+
283
+ # https://playwright.dev/python/docs/api/class-mouse#mouse-move
284
+ def mouse_move(x: float, y: float):
285
+ """
286
+ Move the mouse to a location. Uses absolute client coordinates in pixels.
287
+ Dispatches a mousemove event.
288
+
289
+ Examples:
290
+ mouse_move(65.2, 158.5)
291
+ """
292
+ if demo_mode != "off":
293
+ smooth_move_visual_cursor_to(page, x, y)
294
+ page.mouse.move(x, y)
295
+
296
+
297
+ # https://playwright.dev/python/docs/api/class-mouse#mouse-up
298
+ def mouse_up(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
299
+ """
300
+ Move the mouse to a location then release a mouse button. Dispatches
301
+ mousemove and mouseup events.
302
+
303
+ Examples:
304
+ mouse_up(250, 120)
305
+ mouse_up(47, 252, 'right')
306
+ """
307
+ if demo_mode != "off":
308
+ smooth_move_visual_cursor_to(page, x, y)
309
+ highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
310
+ page.mouse.move(x, y)
311
+ page.mouse.up(button=button)
312
+
313
+
314
+ # https://playwright.dev/python/docs/api/class-mouse#mouse-down
315
+ def mouse_down(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
316
+ """
317
+ Move the mouse to a location then press and hold a mouse button. Dispatches
318
+ mousemove and mousedown events.
319
+
320
+ Examples:
321
+ mouse_down(140.2, 580.1)
322
+ mouse_down(458, 254.5, 'middle')
323
+ """
324
+ if demo_mode != "off":
325
+ smooth_move_visual_cursor_to(page, x, y)
326
+ highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
327
+ page.mouse.move(x, y)
328
+ page.mouse.down(button=button)
329
+
330
+
331
+ # https://playwright.dev/python/docs/api/class-mouse#mouse-click
332
+ def mouse_click(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
333
+ """
334
+ Move the mouse to a location and click a mouse button. Dispatches mousemove,
335
+ mousedown and mouseup events.
336
+
337
+ Examples:
338
+ mouse_click(887.2, 68)
339
+ mouse_click(56, 712.56, 'right')
340
+ """
341
+ if demo_mode != "off":
342
+ smooth_move_visual_cursor_to(page, x, y)
343
+ highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
344
+ page.mouse.click(x, y, button=button)
345
+
346
+
347
+ # https://playwright.dev/python/docs/api/class-mouse#mouse-dblclick
348
+ def mouse_dblclick(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
349
+ """
350
+ Move the mouse to a location and double click a mouse button. Dispatches
351
+ mousemove, mousedown and mouseup events.
352
+
353
+ Examples:
354
+ mouse_dblclick(5, 236)
355
+ mouse_dblclick(87.5, 354, 'right')
356
+ """
357
+ if demo_mode != "off":
358
+ smooth_move_visual_cursor_to(page, x, y)
359
+ highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
360
+ page.mouse.dblclick(x, y, button=button)
361
+
362
+
363
+ def mouse_drag_and_drop(from_x: float, from_y: float, to_x: float, to_y: float):
364
+ """
365
+ Drag and drop from a location to a location. Uses absolute client
366
+ coordinates in pixels. Dispatches mousemove, mousedown and mouseup
367
+ events.
368
+
369
+ Examples:
370
+ mouse_drag_and_drop(10.7, 325, 235.6, 24.54)
371
+ """
372
+ if demo_mode != "off":
373
+ x, y = from_x, from_y
374
+ smooth_move_visual_cursor_to(page, x, y)
375
+ highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
376
+ page.mouse.move(from_x, from_y)
377
+ page.mouse.down()
378
+ if demo_mode != "off":
379
+ x, y = to_x, to_y
380
+ smooth_move_visual_cursor_to(page, x, y)
381
+ highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
382
+ page.mouse.move(to_x, to_y)
383
+ page.mouse.up()
384
+
385
+
386
+ # https://playwright.dev/python/docs/api/class-keyboard#keyboard-press
387
+ def keyboard_press(key: str):
388
+ """
389
+ Press a combination of keys. Accepts the logical key names that are
390
+ emitted in the keyboardEvent.key property of the keyboard events:
391
+ Backquote, Minus, Equal, Backslash, Backspace, Tab, Delete, Escape,
392
+ ArrowDown, End, Enter, Home, Insert, PageDown, PageUp, ArrowRight,
393
+ ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
394
+ alternatively specify a single character you'd like to produce such
395
+ as "a" or "#". Following modification shortcuts are also supported:
396
+ Shift, Control, Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta
397
+ resolves to Control on Windows and Linux and to Meta on macOS.
398
+
399
+ Examples:
400
+ keyboard_press('Backspace')
401
+ keyboard_press('ControlOrMeta+a')
402
+ keyboard_press('Meta+Shift+t')
403
+ page.keyboard.press("PageDown")
404
+ """
405
+ page.keyboard.press(key)
406
+
407
+
408
+ # https://playwright.dev/python/docs/api/class-keyboard#keyboard-up
409
+ def keyboard_up(key: str):
410
+ """
411
+ Release a keyboard key. Dispatches a keyup event. Accepts the logical
412
+ key names that are emitted in the keyboardEvent.key property of the
413
+ keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab,
414
+ Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
415
+ ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc.
416
+ You can alternatively specify a single character you'd like to produce
417
+ such as "a" or "#".
418
+
419
+ Examples:
420
+ keyboard_up('Shift')
421
+ keyboard_up('c')
422
+ """
423
+ page.keyboard.up(key)
424
+
425
+
426
+ # https://playwright.dev/python/docs/api/class-keyboard#keyboard-down
427
+ def keyboard_down(key: str):
428
+ """
429
+ Press and holds a keyboard key. Dispatches a keydown event. Accepts the
430
+ logical key names that are emitted in the keyboardEvent.key property of
431
+ the keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab,
432
+ Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
433
+ ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
434
+ alternatively specify a single character such as "a" or "#".
435
+
436
+ Examples:
437
+ keyboard_up('Shift')
438
+ keyboard_up('c')
439
+ """
440
+ page.keyboard.down(key)
441
+
442
+
443
+ # https://playwright.dev/python/docs/api/class-keyboard#keyboard-type
444
+ def keyboard_type(text: str):
445
+ """
446
+ Types a string of text through the keyboard. Sends a keydown, keypress/input,
447
+ and keyup event for each character in the text. Modifier keys DO NOT affect
448
+ keyboard_type. Holding down Shift will not type the text in upper case.
449
+
450
+ Examples:
451
+ keyboard_type('Hello world!')
452
+ """
453
+ if demo_mode != "off":
454
+ delay = max(2000 / len(text), 10)
455
+ else:
456
+ delay = None
457
+ page.keyboard.type(text, delay=delay)
458
+
459
+
460
+ # https://playwright.dev/python/docs/api/class-keyboard#keyboard-insert-text
461
+ def keyboard_insert_text(text: str):
462
+ """
463
+ Insert a string of text in the currently focused element. Dispatches only input
464
+ event, does not emit the keydown, keyup or keypress events. Modifier keys DO NOT
465
+ affect keyboard_insert_text. Holding down Shift will not type the text in upper
466
+ case.
467
+
468
+ Examples:
469
+ keyboard_insert_text('Hello world!')
470
+ """
471
+ page.keyboard.insert_text(text)
472
+
473
+
474
+ # https://playwright.dev/python/docs/api/class-page#page-goto
475
+ def goto(url: str):
476
+ """
477
+ Navigate to a url.
478
+
479
+ Examples:
480
+ goto('http://www.example.com')
481
+ """
482
+ page.goto(url)
483
+
484
+
485
+ # https://playwright.dev/python/docs/api/class-page#page-go-back
486
+ def go_back():
487
+ """
488
+ Navigate to the previous page in history.
489
+
490
+ Examples:
491
+ go_back()
492
+ """
493
+ page.go_back()
494
+
495
+
496
+ # https://playwright.dev/python/docs/api/class-page#page-go-forward
497
+ def go_forward():
498
+ """
499
+ Navigate to the next page in history.
500
+
501
+ Examples:
502
+ go_forward()
503
+ """
504
+ page.go_forward()
505
+
506
+
507
+ # https://playwright.dev/python/docs/api/class-browsercontext#browser-context-new-page
508
+ def new_tab():
509
+ """
510
+ Open a new tab. It will become the active one.
511
+
512
+ Examples:
513
+ new_tab()
514
+ """
515
+ global page
516
+ # set the new page as the active page
517
+ page = page.context.new_page()
518
+ # trigger the callback that sets this page as active in browsergym
519
+ page.evaluate(
520
+ """\
521
+ const event = new Event('pageshow', {
522
+ bubbles: true, // Whether the event bubbles up through the DOM or not
523
+ cancelable: false // Whether the event can be canceled
524
+ });
525
+ window.dispatchEvent(event);
526
+ """
527
+ )
528
+
529
+
530
+ # https://playwright.dev/python/docs/api/class-page#page-close
531
+ def tab_close():
532
+ """
533
+ Close the current tab.
534
+
535
+ Examples:
536
+ tab_close()
537
+ """
538
+ global page
539
+ context = page.context
540
+ page.close()
541
+ # set most recent page as active page, or open a new page if needed
542
+ if context.pages:
543
+ # TODO: do something more elaborate? (active page history)
544
+ page = context.pages[-1]
545
+ else:
546
+ page = context.new_page()
547
+ # trigger the callback that sets this page as active in browsergym
548
+ page.evaluate(
549
+ """\
550
+ const event = new Event('pageshow', {
551
+ bubbles: true, // Whether the event bubbles up through the DOM or not
552
+ cancelable: false // Whether the event can be canceled
553
+ });
554
+ window.dispatchEvent(event);
555
+ """
556
+ )
557
+
558
+
559
+ # https://playwright.dev/python/docs/api/class-page#page-bring-to-front
560
+ def tab_focus(index: int):
561
+ """
562
+ Bring tab to front (activate tab).
563
+
564
+ Examples:
565
+ tab_focus(2)
566
+ """
567
+ global page # set the focused page as the active page
568
+ page = page.context.pages[index]
569
+ page.bring_to_front()
570
+ # trigger the callback that sets this page as active in browsergym
571
+ page.evaluate(
572
+ """\
573
+ const event = new Event('pageshow', {
574
+ bubbles: true, // Whether the event bubbles up through the DOM or not
575
+ cancelable: false // Whether the event can be canceled
576
+ });
577
+ window.dispatchEvent(event);
578
+ """
579
+ )
580
+
581
+
582
+ # https://playwright.dev/python/docs/input#upload-files
583
+ def upload_file(bid: str, file: str | list[str]):
584
+ """
585
+ Click an element and wait for a "filechooser" event, then select one
586
+ or multiple input files for upload. Relative file paths are resolved
587
+ relative to the current working directory. An empty list clears the
588
+ selected files.
589
+
590
+ Examples:
591
+ upload_file("572", "my_receipt.pdf")
592
+ upload_file("63", ["/home/bob/Documents/image.jpg", "/home/bob/Documents/file.zip"])
593
+ """
594
+ elem = get_elem_by_bid(page, bid, demo_mode != "off")
595
+ add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
596
+
597
+ with page.expect_file_chooser() as fc_info:
598
+ elem.click(timeout=500)
599
+
600
+ file_chooser = fc_info.value
601
+ file_chooser.set_files(file)
602
+
603
+
604
+ # https://playwright.dev/python/docs/input#upload-files
605
+ def mouse_upload_file(x: float, y: float, file: str | list[str]):
606
+ """
607
+ Click a location and wait for a "filechooser" event, then select one
608
+ or multiple input files for upload. Relative file paths are resolved
609
+ relative to the current working directory. An empty list clears the
610
+ selected files.
611
+
612
+ Examples:
613
+ mouse_upload_file(132.1, 547, "my_receipt.pdf")
614
+ mouse_upload_file(328, 812, ["/home/bob/Documents/image.jpg", "/home/bob/Documents/file.zip"])
615
+ """
616
+ if demo_mode != "off":
617
+ smooth_move_visual_cursor_to(page, x, y)
618
+ highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
619
+
620
+ with page.expect_file_chooser() as fc_info:
621
+ page.mouse.click(x, y)
622
+
623
+ file_chooser = fc_info.value
624
+ file_chooser.set_files(file)
BrowserGym/browsergym/core/src/browsergym/core/action/highlevel.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+ import random
3
+ import typing
4
+ from dataclasses import dataclass
5
+
6
+ from . import utils
7
+ from .base import AbstractActionSet
8
+ from .functions import ( # check,; uncheck,
9
+ clear,
10
+ click,
11
+ dblclick,
12
+ drag_and_drop,
13
+ fill,
14
+ focus,
15
+ go_back,
16
+ go_forward,
17
+ goto,
18
+ hover,
19
+ keyboard_down,
20
+ keyboard_insert_text,
21
+ keyboard_press,
22
+ keyboard_type,
23
+ keyboard_up,
24
+ mouse_click,
25
+ mouse_dblclick,
26
+ mouse_down,
27
+ mouse_drag_and_drop,
28
+ mouse_move,
29
+ mouse_up,
30
+ mouse_upload_file,
31
+ new_tab,
32
+ noop,
33
+ press,
34
+ report_infeasible,
35
+ scroll,
36
+ select_option,
37
+ send_msg_to_user,
38
+ tab_close,
39
+ tab_focus,
40
+ upload_file,
41
+ )
42
+ from .parsers import action_docstring_parser, highlevel_action_parser
43
+
44
+ ACTION_SUBSETS = {
45
+ "chat": [send_msg_to_user],
46
+ "infeas": [report_infeasible],
47
+ "bid": [
48
+ scroll,
49
+ fill,
50
+ # These are not really needed and might pollute the action space, doing more harm than good
51
+ # check,
52
+ # uncheck,
53
+ select_option,
54
+ click,
55
+ dblclick,
56
+ hover,
57
+ press,
58
+ focus,
59
+ clear,
60
+ drag_and_drop,
61
+ upload_file,
62
+ ],
63
+ "coord": [
64
+ scroll,
65
+ mouse_move,
66
+ mouse_up,
67
+ mouse_down,
68
+ mouse_click,
69
+ mouse_dblclick,
70
+ mouse_drag_and_drop,
71
+ mouse_upload_file,
72
+ keyboard_down,
73
+ keyboard_up,
74
+ keyboard_press,
75
+ keyboard_type,
76
+ keyboard_insert_text,
77
+ ],
78
+ "nav": [go_back, go_forward, goto],
79
+ "tab": [
80
+ tab_close,
81
+ tab_focus,
82
+ new_tab,
83
+ ],
84
+ # adapted from MiniWoB repo
85
+ # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L122
86
+ "miniwob_all": [
87
+ mouse_move, # MOVE_COORDS
88
+ mouse_click, # CLICK_COORDS
89
+ mouse_dblclick, # DBLCLICK_COORDS
90
+ mouse_down, # MOUSEDOWN_COORDS
91
+ mouse_up, # MOUSEUP_COORDS
92
+ scroll, # SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
93
+ click, # CLICK_ELEMENT
94
+ keyboard_press, # PRESS_KEY
95
+ keyboard_type, # TYPE_TEX (and substitute for TYPE_FIELD()
96
+ fill, # FOCUS_ELEMENT_AND_TYPE_TEXT (and substitute for FOCUS_ELEMENT_AND_TYPE_FIELD)
97
+ ],
98
+ # adapted from MiniWoB repo
99
+ # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L142
100
+ "miniwob_shi17": [
101
+ mouse_click, # CLICK_COORDS
102
+ mouse_dblclick, # DBLCLICK_COORDS
103
+ mouse_down, # MOUSEDOWN_COORDS
104
+ mouse_up, # MOUSEUP_COORDS
105
+ scroll, # SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
106
+ keyboard_press, # PRESS_KEY
107
+ ],
108
+ # adapted from MiniWoB repo
109
+ # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L160
110
+ "miniwob_liu18": [
111
+ click, # CLICK_ELEMENT
112
+ fill, # substitute for FOCUS_ELEMENT_AND_TYPE_FIELD
113
+ ],
114
+ # adapted from MiniWoB repo
115
+ # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L173
116
+ "miniwob_humphreys22": [
117
+ mouse_move, # MOVE_COORDS
118
+ mouse_click, # CLICK_COORDS
119
+ mouse_dblclick, # DBLCLICK_COORDS
120
+ mouse_down, # MOUSEDOWN_COORDS
121
+ mouse_up, # MOUSEUP_COORDS
122
+ scroll, # SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
123
+ keyboard_press, # PRESS_KEY
124
+ keyboard_type, # substitute for TYPE_FIELD
125
+ ],
126
+ # from the webarena paper
127
+ # https://arxiv.org/abs/2307.13854
128
+ # from the webarena source code
129
+ # https://github.com/web-arena-x/webarena/blob/e31c190c9b43f63e5724322b847e00249300df40/browser_env/actions.py#L240
130
+ # from the webarena default prompt
131
+ # https://github.com/web-arena-x/webarena/blob/e31c190c9b43f63e5724322b847e00249300df40/agent/prompts/raw/p_cot_id_actree_2s.py#L13
132
+ "webarena": [
133
+ # # code | paper | prompt
134
+ scroll, # SCROLL | scroll(dir) | scroll [down|up]
135
+ keyboard_press, # KEY_PRESS | press(key_comb) | press [key_comb]
136
+ # MOUSE_CLICK | |
137
+ # KEYBOARD_TYPE | |
138
+ # MOUSE_HOVER | |
139
+ click, # CLICK | click(elem) | click [id]
140
+ fill, # TYPE | type(elem, text) | type [id] [content]
141
+ hover, # HOVER | hover(elem) | hover [id]
142
+ tab_focus, # PAGE_FOCUS | tab_focus(index) | tab_focus [tab_index]
143
+ new_tab, # NEW_TAB | new_tab() | new_tab
144
+ go_back, # GO_BACK | go_back() | go_back
145
+ go_forward, # GO_FORWARD | go_forward() | go_forward
146
+ goto, # GOTO_URL | goto(url) | goto [url]
147
+ tab_close, # PAGE_CLOSE | tab_close() | close_tab
148
+ # CHECK | |
149
+ select_option, # SELECT_OPTION | |
150
+ send_msg_to_user, # STOP | stop(answer) | stop [answer]
151
+ report_infeasible, ## explicit unachievable action, equivalent STOP "N/A"
152
+ ],
153
+ # from the visualwebarena paper
154
+ # https://arxiv.org/abs/2401.13649
155
+ # from the visualwebarena source code
156
+ # https://github.com/web-arena-x/visualwebarena/blob/15890922c97a8694e366fde2d7de8dbd1ff63fb5/browser_env/actions.py#L311-L343
157
+ # from the visualwebarena default prompt
158
+ # https://github.com/web-arena-x/visualwebarena/blob/15890922c97a8694e366fde2d7de8dbd1ff63fb5/agent/prompts/jsons/p_cot_id_actree_3s.json#L2
159
+ "visualwebarena": [
160
+ # # code | paper | prompt
161
+ scroll, # SCROLL | scroll(dir) | scroll [down|up]
162
+ keyboard_press, # KEY_PRESS | press(key_comb) | press [key_comb]
163
+ # MOUSE_CLICK | |
164
+ # KEYBOARD_TYPE | |
165
+ # MOUSE_HOVER | |
166
+ click, # CLICK | click(elem) | click [id]
167
+ fill, # TYPE | type(elem, text) | type [id] [content]
168
+ hover, # HOVER | hover(elem) | hover [id]
169
+ tab_focus, # PAGE_FOCUS | tab_focus(index) | tab_focus [tab_index]
170
+ new_tab, # NEW_TAB | new_tab() | new_tab
171
+ go_back, # GO_BACK | go_back() | go_back
172
+ go_forward, # GO_FORWARD | go_forward() | go_forward
173
+ goto, # GOTO_URL | goto(url) | goto [url]
174
+ tab_close, # PAGE_CLOSE | tab_close() | close_tab
175
+ # CHECK | |
176
+ select_option, # SELECT_OPTION | |
177
+ send_msg_to_user, # STOP | stop(answer) | stop [answer]
178
+ # CLEAR | |
179
+ upload_file, # UPLOAD | |
180
+ report_infeasible, ## explicit unachievable action, equivalent STOP "N/A"
181
+ ],
182
+ # from workarena paper
183
+ # https://arxiv.org/abs/2403.07718
184
+ "workarena": [
185
+ scroll,
186
+ fill,
187
+ select_option,
188
+ click,
189
+ dblclick,
190
+ hover,
191
+ press,
192
+ focus,
193
+ clear,
194
+ drag_and_drop,
195
+ send_msg_to_user,
196
+ ],
197
+ # from workarena++ paper
198
+ # https://arxiv.org/abs/2407.05291
199
+ "workarena++": [
200
+ scroll,
201
+ fill,
202
+ select_option,
203
+ click,
204
+ dblclick,
205
+ hover,
206
+ press,
207
+ focus,
208
+ clear,
209
+ drag_and_drop,
210
+ tab_focus,
211
+ new_tab,
212
+ tab_close,
213
+ go_back,
214
+ go_forward,
215
+ goto,
216
+ send_msg_to_user,
217
+ report_infeasible,
218
+ ],
219
+ # from weblinx_browsergym
220
+ # https://github.com/McGill-NLP/agentlab-weblinx-mvp/blob/a91b6d19870c5187d252e70a2e2013511cc6f1d2/weblinx_browsergym/__init__.py#L274-L286
221
+ "weblinx": [
222
+ send_msg_to_user, # say(speaker="assistant", utterance=[str]) -> send_msg_to_user(text=[str])
223
+ click, # click(uid=[element id]) -> click(bid=[element id])
224
+ hover, # hover(uid=[element id]) -> hover(bid=[element id])
225
+ fill, # textinput(uid=[element id], value=[str]) -> fill(bid=[element id], value=[str])
226
+ # change(uid=[element], value=[str]) -> ❌
227
+ goto, # load(url=[link]) -> goto(url=[link])
228
+ # submit(uid=[element]) -> click(bid=[element id])
229
+ scroll, # scroll(x=[int x],y=[int y]) -> scroll(delta_x=[int x], delta_y=[int y])
230
+ # copy(uid=[element],text=[str]) -> ❌
231
+ # paste(uid=[element],text=[str]) -> ❌
232
+ new_tab, # tabcreate() -> new_tab()
233
+ tab_close, # tabremove(target=[tabId]) -> tab_close()
234
+ tab_focus, # tabswitch(origin=[origin tabId],target=[target tabId]) -> tab_focus(index=[target tabid])
235
+ ],
236
+ # from assistantbench paper
237
+ # https://arxiv.org/abs/2407.15711
238
+ "assistantbench": [
239
+ scroll, # SCROLL
240
+ fill, # TYPE
241
+ select_option, # SELECT
242
+ click, # CLICK
243
+ press, # PRESS ENTER
244
+ go_back, # GOBACK
245
+ goto, # GOTO, SEARCH
246
+ send_msg_to_user, # TERMINATE
247
+ ],
248
+ }
249
+
250
+
251
+ @dataclass
252
+ class HighLevelAction:
253
+ # entrypoint: callable
254
+ signature: str
255
+ description: str
256
+ examples: list[str]
257
+
258
+
259
+ class HighLevelActionSet(AbstractActionSet):
260
+
261
+ # static class variables
262
+ ActionSubset = typing.Literal[
263
+ "chat",
264
+ "infeas",
265
+ "bid",
266
+ "coord",
267
+ "nav",
268
+ "tab",
269
+ "miniwob_all",
270
+ "miniwob_shi17",
271
+ "miniwob_liu18",
272
+ "miniwob_humphreys22",
273
+ "webarena",
274
+ "visualwebarena",
275
+ "workarena",
276
+ "workarena++",
277
+ "weblinx",
278
+ "assistantbench",
279
+ "custom",
280
+ ]
281
+ DemoMode = typing.Literal["off", "default", "all_blue", "only_visible_elements"]
282
+
283
+ def __init__(
284
+ self,
285
+ subsets: typing.Optional[ActionSubset | list[ActionSubset]] = [
286
+ "chat",
287
+ "infeas",
288
+ "bid",
289
+ "nav",
290
+ "tab",
291
+ ],
292
+ custom_actions: typing.Optional[list[callable]] = None,
293
+ multiaction: bool = True,
294
+ demo_mode: typing.Optional[DemoMode] = None,
295
+ strict: bool = False,
296
+ retry_with_force: bool = False,
297
+ ):
298
+ super().__init__(strict)
299
+ self.multiaction = multiaction
300
+ self.demo_mode = demo_mode
301
+ self.retry_with_force = retry_with_force
302
+
303
+ if not subsets:
304
+ raise ValueError(f"'action_subsets' is empty.")
305
+
306
+ if isinstance(subsets, str):
307
+ subsets = [subsets]
308
+
309
+ allowed_actions = [noop] # the noop action is always allowed
310
+
311
+ # add actions from specified action sets
312
+ if subsets:
313
+ for subset in subsets:
314
+ if subset in ACTION_SUBSETS:
315
+ allowed_actions.extend(ACTION_SUBSETS[subset])
316
+ elif subset == "custom":
317
+ if not custom_actions:
318
+ raise ValueError(
319
+ "'custom' is in 'action_subsets' but 'custom_actions' is empty."
320
+ )
321
+ allowed_actions.extend(custom_actions)
322
+ else:
323
+ raise ValueError(f"Unknown high-level action subspace: {subset}")
324
+
325
+ # like set() but preserves order
326
+ # https://stackoverflow.com/questions/1653970/does-python-have-an-ordered-set
327
+ allowed_actions = list(dict.fromkeys(allowed_actions).keys())
328
+
329
+ # parse the actions and build the action space
330
+ self.action_set: dict[str, HighLevelAction] = {}
331
+ self.python_includes = ""
332
+
333
+ # include playwright imports
334
+ self.python_includes += f"""\
335
+ import playwright.sync_api
336
+ from typing import Literal
337
+
338
+
339
+ """
340
+ # set demo_mode and retry_with_force flags
341
+ self.python_includes += f"""\
342
+ demo_mode={repr(demo_mode)}
343
+ retry_with_force={repr(retry_with_force)}
344
+
345
+ if demo_mode is None:
346
+ demo_mode = "default" if DEMO_MODE else "off"
347
+
348
+ """
349
+
350
+ # include utility functions
351
+ for _, func in inspect.getmembers(utils, inspect.isfunction):
352
+ self.python_includes += f"""\
353
+ {inspect.getsource(func)}
354
+
355
+
356
+ """
357
+
358
+ # parse and include action functions
359
+ for func in allowed_actions:
360
+
361
+ # include action function definition in the code
362
+ self.python_includes += f"""\
363
+ {inspect.getsource(func)}
364
+
365
+
366
+ """
367
+
368
+ # extract action signature
369
+ signature = f"{func.__name__}{inspect.signature(func)}"
370
+
371
+ # parse docstring
372
+ description, examples = action_docstring_parser.parse_string(func.__doc__)
373
+
374
+ # reconstruct action description
375
+ description = " ".join(description)
376
+
377
+ # reconstruct action examples
378
+ examples = [
379
+ function_name + "(" + ", ".join([repr(arg) for arg in function_args]) + ")"
380
+ for function_name, function_args in examples
381
+ ]
382
+
383
+ if func.__name__ in self.action_set:
384
+ raise ValueError(f"Duplicated action '{func.__name__}'")
385
+
386
+ self.action_set[func.__name__] = HighLevelAction(
387
+ # entrypoint=func,
388
+ signature=signature,
389
+ description=description,
390
+ examples=examples,
391
+ )
392
+
393
+ def example_action(self, abstract: bool, max_examples: int = 3) -> str:
394
+ """
395
+ Returns an example action as a string.
396
+ """
397
+ if abstract:
398
+ if self.multiaction:
399
+ return """\
400
+ One or several actions, separated by new lines."""
401
+ else:
402
+ return """\
403
+ One single action to be executed. You can only use one action at a time."""
404
+ else:
405
+ picked_examples = []
406
+
407
+ # use fill and click examples if action is present
408
+ for action_name in ["fill", "click", "mouse_click", "keyboard_type"]:
409
+ if action_name in self.action_set:
410
+ picked_examples.extend(self.action_set[action_name].examples)
411
+
412
+ # last resort, use all action examples
413
+ if not picked_examples:
414
+ for _, action in self.action_set.items():
415
+ picked_examples += action.examples
416
+
417
+ # shuffle examples
418
+ rng = random.Random(1)
419
+ rng.shuffle(picked_examples)
420
+
421
+ if self.multiaction:
422
+ return "\n".join(picked_examples[:max_examples])
423
+ else:
424
+ return picked_examples[0]
425
+
426
+ def describe(self, with_long_description: bool = True, with_examples: bool = True):
427
+ """
428
+ Returns a textual description of this action space.
429
+ """
430
+ description = f"""
431
+ {len(self.action_set)} different types of actions are available.
432
+
433
+ """
434
+ for _, action in self.action_set.items():
435
+ description += f"""\
436
+ {action.signature}
437
+ """
438
+
439
+ if with_long_description:
440
+ description += f"""\
441
+ Description: {action.description}
442
+ """
443
+ if with_examples and action.examples:
444
+ description += f"""\
445
+ Examples:
446
+ """
447
+ for example in action.examples:
448
+ description += f"""\
449
+ {example}
450
+
451
+ """
452
+
453
+ if self.multiaction:
454
+ description += f"""\
455
+ Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
456
+ More than 2-3 actions usually leads to failure or unexpected behavior."""
457
+ else:
458
+ description += f"""\
459
+ Only a single action can be provided at once."""
460
+
461
+ example_action = self.example_action(abstract=False)
462
+ if example_action:
463
+ description += f""" Example:
464
+ {example_action}
465
+ """
466
+ else:
467
+ description += f"""\
468
+
469
+ """
470
+
471
+ return description
472
+
473
+ def to_python_code(self, action):
474
+ """
475
+ Converts the given high-level action string to browsergym-compatible python code.
476
+
477
+ Args:
478
+ action: the high-level action to parse.
479
+
480
+ Returns:
481
+ Executable python code that performs the action in a browsergym environment.
482
+ """
483
+ highlevel_code = action
484
+
485
+ # do the actual parsing and convert each high-level action to
486
+ # the corresponding python function call
487
+ if self.strict:
488
+ function_calls = highlevel_action_parser.parse_string(highlevel_code, parse_all=True)
489
+ function_calls = function_calls.as_list()
490
+ else:
491
+ function_calls = highlevel_action_parser.search_string(
492
+ highlevel_code
493
+ ) # allow for multiple matches, skip anything in-between
494
+ function_calls = sum(function_calls.as_list(), []) # unpack multiple matches
495
+
496
+ if not function_calls:
497
+ raise ValueError("Received an empty action.")
498
+ elif len(function_calls) > 1 and not self.multiaction:
499
+ raise ValueError("Received a multi-action, only single-actions are allowed.")
500
+
501
+ python_code = ""
502
+
503
+ # function definitions
504
+ python_code += self.python_includes
505
+
506
+ # function calls
507
+ for function_name, function_args in function_calls:
508
+ if function_name not in self.action_set:
509
+ raise NameError(f"Invalid action type '{function_name}'.")
510
+ python_code += (
511
+ function_name + "(" + ", ".join([repr(arg) for arg in function_args]) + ")\n"
512
+ )
513
+
514
+ # return the constructed python code
515
+ return python_code
516
+
517
+
518
+ # consistency checks
519
+ assert "custom" not in ACTION_SUBSETS
520
+ assert set(typing.get_args(HighLevelActionSet.ActionSubset)) == set(
521
+ list(ACTION_SUBSETS.keys()) + ["custom"]
522
+ )
BrowserGym/browsergym/core/src/browsergym/core/action/parsers.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import pyparsing as pp
3
+
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+
7
+
8
+ @dataclass
9
+ class NamedArgument:
10
+ name: str
11
+ value: Any
12
+
13
+ def __repr__(self):
14
+ return f"{self.name}={repr(self.value)}"
15
+
16
+
17
+ def _build_highlevel_action_parser() -> pp.ParserElement:
18
+ """
19
+ Returns:
20
+ An action parser that accepts Python-like function calls with string, number, list or dict literals as arguments.
21
+ Example:
22
+ func("a", 42, None, True, [2, 4, "s"], {"a_key": "a_value"}, )
23
+ The parser is loose and accepts multi-line or single-line combinations af calls.
24
+ Example:
25
+ func() func()
26
+ \tfunc()
27
+ Python comments are ignored.
28
+ Example:
29
+ # this is a comment
30
+ func() # this function call will be parsed
31
+ # func() # this one will not
32
+ The parser will return a list of (function_name, function_args) tuples, one for each function call in the input.
33
+ The parser will raise exceptions
34
+
35
+ """
36
+
37
+ def make_keyword(kwd_str, kwd_value):
38
+ return pp.Keyword(kwd_str).set_parse_action(pp.replace_with(kwd_value))
39
+
40
+ TRUE = make_keyword("True", True)
41
+ FALSE = make_keyword("False", False)
42
+ NONE = make_keyword("None", None)
43
+
44
+ LBRACK, RBRACK, LBRACE, RBRACE, LPAREN, RPAREN, COLON = map(pp.Suppress, "[]{}():")
45
+
46
+ def literal_eval(toks):
47
+ return ast.literal_eval(toks[0])
48
+
49
+ string = pp.python_quoted_string().set_parse_action(literal_eval)
50
+ number = pp.pyparsing_common.number()
51
+ dict = pp.Forward().set_name("dict") # will be defined later
52
+ list = pp.Forward().set_name("list") # will be defined later
53
+ _tuple = pp.Forward().set_name("tuple") # will be defined later
54
+ element = (string | number | dict | list | _tuple | TRUE | FALSE | NONE).set_name("element")
55
+
56
+ list_items = pp.DelimitedList(element, allow_trailing_delim=True).set_name(None)
57
+ list << pp.Group(LBRACK + pp.Optional(list_items) + RBRACK, aslist=True)
58
+ _tuple << pp.Group(LPAREN + pp.Optional(list_items) + RPAREN, aslist=True).set_parse_action(
59
+ lambda tokens: tuple(tokens[0])
60
+ )
61
+
62
+ dict_item = pp.Group(string + COLON + element, aslist=True).set_name("dict item")
63
+ dict_items = pp.DelimitedList(dict_item, allow_trailing_delim=True).set_name(None)
64
+ dict << pp.Dict(LBRACE + pp.Optional(dict_items) + RBRACE, asdict=True)
65
+
66
+ arg = element
67
+ list_args = pp.DelimitedList(arg, allow_trailing_delim=True).set_name(None)
68
+ named_arg = (pp.pyparsing_common.identifier() + pp.Literal("=") + element).set_parse_action(
69
+ lambda tokens: NamedArgument(name=tokens[0], value=tokens[2])
70
+ )
71
+ list_named_args = pp.DelimitedList(named_arg, allow_trailing_delim=True).set_name(None)
72
+ function_call = pp.pyparsing_common.identifier() + pp.Group(
73
+ LPAREN + pp.Optional(list_args) + pp.Optional(list_named_args) + RPAREN, aslist=True
74
+ )
75
+
76
+ multiple_function_calls = pp.DelimitedList(pp.Group(function_call), delim="")
77
+ multiple_function_calls.ignore(pp.python_style_comment())
78
+
79
+ parser = multiple_function_calls
80
+
81
+ return parser
82
+
83
+
84
+ # this one will be used to extract python-like function calls
85
+ highlevel_action_parser: pp.ParserElement = _build_highlevel_action_parser()
86
+
87
+ # this one will be used to process the docstring in high-level actions, in order to describe the action space
88
+ action_docstring_parser: pp.ParserElement = (
89
+ pp.Group(pp.OneOrMore(pp.Word(pp.printables), stop_on=pp.Literal("Examples:")))
90
+ + pp.Literal("Examples:").suppress()
91
+ + pp.Group(highlevel_action_parser)
92
+ )
BrowserGym/browsergym/core/src/browsergym/core/action/python.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from .base import AbstractActionSet
4
+
5
+
6
+ class PythonActionSet(AbstractActionSet):
7
+ def describe(self, with_long_description: bool = True, with_examples: bool = True):
8
+ """
9
+ Returns a textual description of this action space.
10
+ """
11
+ description = f"""
12
+ Each action consists of executable Python code (python>=3.10) that uses the Playwright library (playwright==1.32)
13
+ to interact with the current webpage and the browser context. The currently active webpage is accessible via the
14
+ global variable `page`. A function `send_message_to_user(text)` is also accessible and can be used to send a
15
+ message to the user, as well as a function `report_infeasible_instructions(reason)` to notify the user when their
16
+ instructions are infeasible."""
17
+ if with_long_description:
18
+ description += f"""
19
+ The browser context is in `page.context`, and all open webpages (tabs and popups)
20
+ are in `page.context.pages`. Here is is an example of a valid action:
21
+ ```
22
+ frame = page.frame_locator(".result-frame")
23
+ button = frame.get_by_text("Submit")
24
+ button.click()
25
+ ```
26
+ Here is another example:
27
+ ```
28
+ frame = page.get_by_test_id("a").frame_locator(":scope")
29
+ frame.get_by_test_id("a776").click()
30
+ ```
31
+ Note that Playwright's `get_by_test_id()` method is configured to use the `bid` attribute to locate HTML elements,
32
+ instead of the default `data-testid`. Also, Playwright's locators can not traverse iframes, so you have to locate
33
+ parent iframes first in order to locate an element in an iframe. The `bid` attribute contains all the information
34
+ required to recursively locate an element. For example, an element with `bid="ac2"` can be retrieved as follows:
35
+ ```
36
+ frame = page.get_by_test_id("a").frame_locator(":scope")
37
+ frame = frame.get_by_test_id("ac").frame_locator(":scope")
38
+ elem = frame.get_by_test_id("ac2")
39
+ ```
40
+ """
41
+ else:
42
+ description += f"""\
43
+
44
+ """
45
+ if with_examples:
46
+ description += f"""\
47
+ Here are other examples of valid actions:
48
+ ```
49
+ page = page.context.new_page()
50
+ page.goto("https://www.wikipedia.org/")
51
+ ```
52
+ ```
53
+ page.get_by_label("Birth date").fill("2020-02-02")
54
+ page.get_by_role("link", name="Get started").click()
55
+ ```
56
+ ```
57
+ page.get_by_label('I agree to the terms above').check()
58
+ ```
59
+ ```
60
+ page.locator('#area').fill('Hello World!')
61
+ ```
62
+ ```
63
+ page.get_by_role("textbox").press("Control+ArrowRight")
64
+ ```
65
+ ```
66
+ send_message_to_user("There are 7 items to choose from.")
67
+ ```
68
+ ```
69
+ report_infeasible_instructions("I cannot follow these instructions because there is no email field in this form.")
70
+ ```
71
+ """
72
+
73
+ return description
74
+
75
+ def example_action(self, abstract: bool) -> str:
76
+ """
77
+ Returns an example action as a string.
78
+ """
79
+ if abstract:
80
+ return """\
81
+ One single bloc of Python code. Do not include any explanation, only valid Python code."""
82
+ else:
83
+ return """\
84
+ frame = page.get_by_test_id("b").frame_locator(":scope")
85
+ frame = page.get_by_test_id("ba").frame_locator(":scope")
86
+ frame.get_by_test_id("ba2").fill("Hello world!")
87
+ frame.get_by_test_id("ba3").click()
88
+ """
89
+
90
+ def to_python_code(self, action):
91
+ """
92
+ Converts the given code action string to browsergym-compatible playwright code.
93
+
94
+ Args:
95
+ action: the code action to parse.
96
+
97
+ Returns:
98
+ Executable playwright code that performs the action in a browsergym environment.
99
+ """
100
+
101
+ python_code = ""
102
+
103
+ # extract markdown-style code snippets if detected
104
+ pattern = re.compile(r"```(?:python)?\n(?P<code>[\s\S]*?)```")
105
+ if pattern.match(action):
106
+ python_code += "\n".join([match.group("code") for match in pattern.finditer(action)])
107
+ # otherwise just use the code action as is
108
+ else:
109
+ python_code += action
110
+
111
+ # return the produced playwright code
112
+ return python_code
BrowserGym/browsergym/core/src/browsergym/core/action/utils.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal
2
+
3
+ import playwright.sync_api
4
+
5
+
6
+ def get_elem_by_bid(
7
+ page: playwright.sync_api.Page, bid: str, scroll_into_view: bool = False
8
+ ) -> playwright.sync_api.Locator:
9
+ """
10
+ Parse the given bid to sequentially locate every nested frame leading to the bid, then
11
+ locate the bid element. Bids are expected to take the form "abDb123", which means
12
+ the element abDb123 is located inside frame abDAb, which is located inside frame abDA,
13
+ which is located inside frame a, which is located inside the page's main frame.
14
+
15
+ Args:
16
+ bid: the browsergym id (playwright testid) of the page element.
17
+ scroll_into_view: try to scroll element into view, unless it is completely visible.
18
+
19
+ Returns:
20
+ Playwright element.
21
+ Bounding box of the element.
22
+ """
23
+ if not isinstance(bid, str):
24
+ raise ValueError(f"expected a string, got {repr(bid)}")
25
+
26
+ current_frame = page
27
+
28
+ # dive into each nested frame, to the frame where the element is located
29
+ i = 0
30
+ while bid[i:] and not bid[i:].isnumeric():
31
+ i += 1
32
+ # allow multi-character frame ids such as aA, bCD etc.
33
+ while bid[i:] and bid[i].isalpha() and bid[i].isupper():
34
+ i += 1
35
+ frame_bid = bid[:i] # bid of the next frame to select
36
+ frame_elem = current_frame.get_by_test_id(frame_bid)
37
+ if not frame_elem.count():
38
+ raise ValueError(f'Could not find element with bid "{bid}"')
39
+ if scroll_into_view:
40
+ frame_elem.scroll_into_view_if_needed(timeout=500)
41
+ current_frame = frame_elem.frame_locator(":scope")
42
+
43
+ # finally, we should have selected the frame where the target element is
44
+ elem = current_frame.get_by_test_id(bid)
45
+ if not elem.count():
46
+ raise ValueError(f'Could not find element with bid "{bid}"')
47
+ if scroll_into_view:
48
+ elem.scroll_into_view_if_needed(timeout=500)
49
+ return elem
50
+
51
+
52
+ def highlight_by_box(
53
+ page: playwright.sync_api.Page, box: dict, color: Literal["blue", "red"] = "blue"
54
+ ):
55
+ """Highlights the target element based on its bounding box attributes."""
56
+
57
+ assert color in ("blue", "red")
58
+
59
+ if box:
60
+ left, top, width, height = box["x"], box["y"], box["width"], box["height"]
61
+ page.evaluate(
62
+ f"""\
63
+ const overlay = document.createElement('div');
64
+ document.body.appendChild(overlay);
65
+ overlay.setAttribute('style', `
66
+ all: initial;
67
+ position: fixed;
68
+ border: 2px solid transparent; /* Start with transparent border */
69
+ borderRadius: 10px; /* Add rounded corners */
70
+ boxShadow: 0 0 0px {color}; /* Initial boxShadow with 0px spread */
71
+ left: {left - 2}px; /* Adjust left position to accommodate initial shadow spread */
72
+ top: {top - 2}px; /* Adjust top position likewise */
73
+ width: {width}px;
74
+ height: {height}px;
75
+ z-index: 2147483646; /* Maximum value - 1 */
76
+ pointerEvents: none; /* Ensure the overlay does not interfere with user interaction */
77
+ `);
78
+
79
+ // Animate the boxShadow to create a "wave" effect
80
+ let spread = 0; // Initial spread radius of the boxShadow
81
+ const waveInterval = setInterval(() => {{
82
+ spread += 10; // Increase the spread radius to simulate the wave moving outward
83
+ overlay.style.boxShadow = `0 0 40px ${{spread}}px {color}`; // Update boxShadow to new spread radius
84
+ overlay.style.opacity = 1 - spread / 38; // Gradually decrease opacity to fade out the wave
85
+ if (spread >= 38) {{ // Assuming 76px ~ 2cm spread radius
86
+ clearInterval(waveInterval); // Stop the animation once the spread radius reaches 2cm
87
+ document.body.removeChild(overlay); // Remove the overlay from the document
88
+ }}
89
+ }}, 200); // Adjust the interval as needed to control the speed of the wave animation
90
+ """
91
+ )
92
+ # Wait a bit to let users see the highlight
93
+ page.wait_for_timeout(1000) # Adjust delay as needed
94
+
95
+
96
+ def smooth_move_visual_cursor_to(
97
+ page: playwright.sync_api.Page, x: float, y: float, speed: float = 400
98
+ ):
99
+ """
100
+ Smoothly moves the visual cursor to a specific point, with constant
101
+ movement speed.
102
+
103
+ Args:
104
+ x: target location X coordinate (in viewport pixels)
105
+ y: target location Y coordinate (in viewport pixels)
106
+ speed: cursor speed (in pixels per second)
107
+ """
108
+ movement_time = page.evaluate(
109
+ """\
110
+ ([targetX, targetY, speed]) => {
111
+
112
+ // create cursor if needed
113
+ if (!("browsergym_visual_cursor" in window)) {
114
+ if (window.trustedTypes && window.trustedTypes.createPolicy) {
115
+ window.trustedTypes.createPolicy('default', {
116
+ createHTML: (string, sink) => string
117
+ });
118
+ }
119
+ let cursor = document.createElement('div');
120
+ cursor.setAttribute('id', 'browsergym-visual-cursor');
121
+ cursor.innerHTML = `
122
+ <svg width="50px" height="50px" viewBox="213 106 713 706" fill="none" xmlns="http://www.w3.org/2000/svg">
123
+ <path d="M213.333 106.667L426.667 853.333 512 512 853.333 426.667 213.333 106.667z" fill="blue"/>
124
+ </svg>
125
+ `;
126
+ cursor.setAttribute('style', `
127
+ all: initial;
128
+ position: fixed;
129
+ opacity: 0.7; /* Slightly transparent */
130
+ z-index: 2147483647; /* Maximum value */
131
+ pointer-events: none; /* Ensures the SVG doesn't interfere with page interactions */
132
+ `);
133
+
134
+ // Calculate center position within the viewport
135
+ const centerX = window.innerWidth / 2;
136
+ const centerY = window.innerHeight / 2;
137
+
138
+ cursor.style.left = `${centerX}px`;
139
+ cursor.style.top = `${centerY}px`;
140
+
141
+ // save cursor element
142
+ window.browsergym_visual_cursor = cursor;
143
+ window.browsergym_visual_cursor_n_owners = 0;
144
+ }
145
+
146
+ // recover cursor
147
+ let cursor = window.browsergym_visual_cursor;
148
+
149
+ // attach cursor to document
150
+ document.body.appendChild(cursor);
151
+ window.browsergym_visual_cursor_n_owners += 1;
152
+
153
+ x = parseFloat(cursor.style.left);
154
+ y = parseFloat(cursor.style.top);
155
+
156
+ dx = targetX - x;
157
+ dy = targetY - y;
158
+ dist = Math.hypot(dx, dy);
159
+ movement_time = (dist / speed) * 1000; // seconds to milliseconds
160
+ still_wait_time = 1000;
161
+
162
+ // Adjust steps based on distance to keep movement speed consistent
163
+ // 1 step per 10 pixels of distance, adjust as needed
164
+ steps = Math.max(1, Math.trunc(dist / 10));
165
+
166
+ step_dx = dx / steps;
167
+ step_dy = dy / steps;
168
+ step_dist = dist / steps;
169
+ step_wait_time = Math.max(10, movement_time / steps);
170
+
171
+ let step = 0;
172
+ let time_still = 0;
173
+ const cursorInterval = setInterval(() => {
174
+ // move cursor
175
+ if (step < steps) {
176
+ x += step_dx;
177
+ y += step_dy;
178
+ cursor.style.left = `${x}px`;
179
+ cursor.style.top = `${y}px`;
180
+ }
181
+ // still cursor (wait a bit)
182
+ else if (time_still < still_wait_time) {
183
+ time_still += step_wait_time;
184
+ }
185
+ // stop and detach cursor
186
+ else {
187
+ clearInterval(cursorInterval);
188
+ window.browsergym_visual_cursor_n_owners -= 1;
189
+ if (window.browsergym_visual_cursor_n_owners <= 0) {
190
+ document.body.removeChild(cursor);
191
+
192
+ }
193
+ }
194
+ step += 1;
195
+ }, step_wait_time);
196
+
197
+ return movement_time;
198
+ }""",
199
+ [x, y, speed],
200
+ )
201
+ page.wait_for_timeout(movement_time)
202
+
203
+
204
+ def check_for_overlay(
205
+ page: playwright.sync_api.Page, bid: str, element: playwright.sync_api.ElementHandle, box: dict
206
+ ):
207
+ if not element:
208
+ return False
209
+
210
+ visibility = element.get_attribute("browsergym_visibility_ratio")
211
+ if visibility is not None:
212
+ return float(visibility) >= 0.5
213
+
214
+ """Checks if a given element is the topmost element at its center position by default.
215
+ If check_corners is True, it checks if any of the corners is visible."""
216
+ if box:
217
+ # corners
218
+ points_to_check = [
219
+ (box["x"], box["y"]),
220
+ (box["x"] + box["width"], box["y"]),
221
+ (box["x"], box["y"] + box["height"]),
222
+ (box["x"] + box["width"], box["y"] + box["height"]),
223
+ ]
224
+
225
+ for x, y in points_to_check:
226
+ # Execute JavaScript to find the topmost element at the point.
227
+ top_element = page.evaluate(
228
+ f"""() => {{
229
+ const el = document.elementFromPoint({x}, {y});
230
+ return el ? el.outerHTML : '';
231
+ }}"""
232
+ )
233
+
234
+ # Check if the topmost element is the element we're interested in.
235
+ if top_element and bid in top_element:
236
+ return True
237
+
238
+ return False
239
+
240
+
241
+ def add_demo_mode_effects(
242
+ page: playwright.sync_api.Page,
243
+ elem: playwright.sync_api.ElementHandle,
244
+ bid: str,
245
+ demo_mode: Literal["off", "default", "all_blue", "only_visible_elements"],
246
+ move_cursor: bool = True,
247
+ highlight_box: bool = True,
248
+ ):
249
+ if demo_mode == "off":
250
+ return
251
+
252
+ """Adds visual effects to the target element"""
253
+ box = elem.bounding_box()
254
+ # box = extract_bounds_cdp(page, bid)
255
+ if box:
256
+ center_x, center_y = box["x"] + box["width"] / 2, box["y"] + box["height"] / 2
257
+ is_top_element = check_for_overlay(page, bid, elem, box)
258
+
259
+ if demo_mode == "only_visible_elements":
260
+ if not is_top_element:
261
+ return
262
+ else:
263
+ color = "blue"
264
+
265
+ elif demo_mode == "default":
266
+ if is_top_element:
267
+ color = "blue"
268
+ else:
269
+ color = "red"
270
+
271
+ elif demo_mode == "all_blue":
272
+ color = "blue"
273
+
274
+ if move_cursor:
275
+ smooth_move_visual_cursor_to(page, center_x, center_y)
276
+
277
+ if highlight_box:
278
+ highlight_by_box(page, box, color=color)
279
+
280
+
281
+ def call_fun(fun: callable, retry_with_force: bool):
282
+ try:
283
+ fun(force=False)
284
+ except playwright.sync_api.TimeoutError as e:
285
+ if retry_with_force:
286
+ fun(force=True)
287
+ else:
288
+ raise e
BrowserGym/browsergym/core/src/browsergym/core/chat.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from pathlib import Path
3
+ from typing import Literal
4
+ import logging
5
+ import playwright.sync_api
6
+ import re
7
+ import time
8
+
9
+ from importlib import resources
10
+
11
+ from . import _get_global_playwright, chat_files
12
+
13
+
14
+ CHATBOX_DIR = resources.files(chat_files)
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class Chat:
20
+ def __init__(
21
+ self, headless: bool, chat_size=(500, 800), record_video_dir=None, modern=True
22
+ ) -> None:
23
+ self.messages = []
24
+
25
+ # create a new browser, browser context and page for the chat
26
+ pw: playwright.sync_api.Playwright = _get_global_playwright()
27
+ self.browser = pw.chromium.launch(
28
+ headless=headless, args=[f"--window-size={chat_size[0]},{chat_size[1]}"]
29
+ )
30
+ self.context = self.browser.new_context(
31
+ no_viewport=True,
32
+ record_video_dir=Path(record_video_dir) / "chat_video" if record_video_dir else None,
33
+ record_video_size=dict(width=chat_size[0], height=chat_size[1]),
34
+ )
35
+ self.page = self.context.new_page()
36
+ self.recording_start_time = time.time() if record_video_dir else None
37
+
38
+ # setup the chat page
39
+ self.page.expose_function(
40
+ "send_user_message", lambda msg: self._js_user_message_received_callback(msg=msg)
41
+ )
42
+
43
+ if modern:
44
+ self.page.set_content(get_chatbox_modern(CHATBOX_DIR))
45
+ else:
46
+ self.page.set_content(get_chatbox_classic(CHATBOX_DIR))
47
+
48
+ def _js_user_message_received_callback(self, msg: str):
49
+ """Callback function for when a user message is received in the chatbox"""
50
+ utc_time = time.time()
51
+ self.messages.append({"role": "user", "timestamp": utc_time, "message": msg})
52
+ # returning a list as JS doesnt like tuples
53
+ return ["user", time.strftime("%H:%M", time.localtime(utc_time)), msg]
54
+
55
+ def add_message(
56
+ self, role: Literal["user", "user_image", "assistant", "info", "infeasible"], msg: str
57
+ ):
58
+ """Add a message to the chatbox and update the page accordingly."""
59
+ utc_time = time.time()
60
+ if role not in ("user", "user_image", "assistant", "info", "infeasible"):
61
+ raise ValueError(f"Invalid role: {role}")
62
+ if role in ("user", "user_image", "assistant", "infeasible"):
63
+ self.messages.append({"role": role, "timestamp": utc_time, "message": msg})
64
+ timestamp = time.strftime("%H:%M:%S", time.localtime(utc_time))
65
+ self.page.evaluate(f"addChatMessage({repr(role)}, {repr(timestamp)}, {repr(msg)});")
66
+
67
+ def wait_for_user_message(self):
68
+ logger.info("Waiting for message from user...")
69
+ # reset flag
70
+ self.page.evaluate("USER_MESSAGE_RECEIVED = false;")
71
+ # wait for flag to be raised
72
+ self.page.wait_for_function("USER_MESSAGE_RECEIVED", polling=100, timeout=0)
73
+ logger.info("Message received.")
74
+
75
+ def close(self):
76
+ self.context.close()
77
+ self.browser.close()
78
+
79
+
80
+ def get_chatbox_modern(chatbox_dir) -> str:
81
+ with open(chatbox_dir / "chatbox_modern.html", "r") as file:
82
+ chatbox_html = file.read()
83
+
84
+ return chatbox_html
85
+
86
+
87
+ def get_chatbox_classic(chatbox_dir) -> str:
88
+ with open(chatbox_dir / "chatbox.html", "r") as file:
89
+ chatbox_html = file.read()
90
+ with open(chatbox_dir / "assistant.png", "rb") as f:
91
+ image_base64 = base64.b64encode(f.read()).decode("utf-8")
92
+
93
+ assistant_image_url = f"data:image/png;base64,{image_base64}"
94
+ chatbox_html = re.sub("<ASSISTANT_IMAGE_URL>", assistant_image_url, chatbox_html)
95
+ return chatbox_html
BrowserGym/browsergym/core/src/browsergym/core/chat_files/chatbox.html ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>UI Assistant Chat</title>
8
+ <style>
9
+ .chat-container {
10
+ display: flex;
11
+ flex-flow: column;
12
+ position: fixed;
13
+ bottom: 0;
14
+ right: 0;
15
+ height: 100%;
16
+ width: 100%;
17
+ border: 1px solid black;
18
+ background-color: white;
19
+ padding: 0;
20
+ overflow: hidden;
21
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
22
+ font-family: 'Source Sans Pro', Arial, Helvetica, sans-serif;
23
+ }
24
+
25
+ .chat-header {
26
+ background-color: #032D42;
27
+ color: white;
28
+ padding: 5px;
29
+ padding-left: 15px;
30
+ text-align: center;
31
+ flex: 0 1 auto;
32
+ }
33
+
34
+ .chat-body {
35
+ padding: 10px;
36
+ overflow-y: auto;
37
+ display: flex;
38
+ flex-direction: column;
39
+ flex: 1 1 auto;
40
+ }
41
+
42
+ .chat-debug {
43
+ padding: 10px;
44
+ max-height: 30%;
45
+ overflow-y: auto;
46
+ display: flex;
47
+ flex-direction: column;
48
+ flex: 0 0 auto;
49
+ }
50
+
51
+ .chat-input-area {
52
+ display: flex;
53
+ flex-flow: row;
54
+ margin-top: 5px;
55
+ margin-top: 5px;
56
+ padding: 10px;
57
+ border-top: 1px solid #ddd;
58
+ flex: 0 1 50px;
59
+ }
60
+
61
+ .chat-input-area form {
62
+ display: flex;
63
+ width: 100%;
64
+ height: 100%;
65
+ }
66
+
67
+ .input-box {
68
+ padding: 5px;
69
+ margin-right: 10px;
70
+ border-radius: 5px;
71
+ border: 1px solid #ccc;
72
+ width: 100%;
73
+ }
74
+
75
+ .submit-button {
76
+ padding: 5px 10px;
77
+ border-radius: 5px;
78
+ background-color: #4CAF50;
79
+ color: white;
80
+ border: none;
81
+ align-self: center;
82
+ }
83
+
84
+ .message {
85
+ display: flex;
86
+ align-items: center;
87
+ margin: 0px;
88
+ padding: 0px;
89
+ }
90
+
91
+ .message p {
92
+ padding: 10px;
93
+ /* Added padding inside the bubble */
94
+ border-radius: 15px;
95
+ flex-grow: 1;
96
+ margin-top: 10;
97
+ margin-bottom: 0;
98
+ }
99
+
100
+ .chat-debug .message p {
101
+ padding: 0;
102
+ border-radius: 0;
103
+ flex-grow: 1;
104
+ margin-top: 0;
105
+ margin-bottom: 0;
106
+ }
107
+
108
+ .user-message {
109
+ background-color: #d1f4d1;
110
+ }
111
+
112
+ .assistant-message {
113
+ background-color: #e0e0e0;
114
+ }
115
+
116
+ .info-message {
117
+ background-color: #f0f0f0;
118
+ color: #707070;
119
+ font-size: 13px;
120
+ }
121
+
122
+ .assistant-image {
123
+ margin: 0px;
124
+ padding: 10px;
125
+ width: 40px;
126
+ }
127
+ </style>
128
+ </head>
129
+
130
+ <body>
131
+
132
+
133
+
134
+ <div class="chat-container">
135
+ <div class="chat-header">
136
+ <h2>BrowserGym</h2>
137
+ </div>
138
+ <div class="chat-body" id="chatBody"></div>
139
+ <div class="chat-debug" id="chatDebug"></div>
140
+ <div class="chat-input-area">
141
+ <form id="chatForm">
142
+ <textarea class="input-box" rows="2" id="inputBox"></textarea>
143
+ <input type="submit" class="submit-button" value="Send">
144
+ </form>
145
+ </div>
146
+ </div>
147
+
148
+ <script>
149
+
150
+ const assistant_image_data = "<ASSISTANT_IMAGE_URL>";
151
+
152
+ var USER_MESSAGE_RECEIVED = false;
153
+
154
+ function escapeHtml(unsafe) {
155
+ return unsafe
156
+ .replace(/&/g, "&amp;")
157
+ .replace(/</g, "&lt;")
158
+ .replace(/>/g, "&gt;")
159
+ .replace(/"/g, "&quot;")
160
+ .replace(/'/g, "&#039;");
161
+ }
162
+
163
+ function addChatMessage(role, msg) {
164
+ const chatBody = document.getElementById('chatBody');
165
+ const chatDebug = document.getElementById('chatDebug');
166
+ const msgContainer = document.createElement('div');
167
+ msgContainer.className = 'message';
168
+
169
+ const text = document.createElement('p');
170
+ text.innerHTML = escapeHtml(msg);
171
+
172
+ const assistant_img = document.createElement('img');
173
+ assistant_img.src = assistant_image_data;
174
+ assistant_img.alt = 'Assistant';
175
+ assistant_img.className = 'assistant-image';
176
+
177
+
178
+ switch (role) {
179
+ case "user":
180
+ text.className = 'user-message';
181
+ msgContainer.appendChild(text);
182
+ chatBody.appendChild(msgContainer);
183
+ break;
184
+ case "assistant":
185
+ text.className = 'assistant-message';
186
+ msgContainer.appendChild(assistant_img); // Add the image to the message container
187
+ msgContainer.appendChild(text);
188
+ chatBody.appendChild(msgContainer);
189
+ break;
190
+ case "info":
191
+ text.className = 'info-message';
192
+ text.innerHTML = msg;
193
+ msgContainer.appendChild(text);
194
+ // hide previous debug messages
195
+ for (const msg of chatDebug.children) {
196
+ msg.style.display = 'none';
197
+ }
198
+ chatDebug.appendChild(msgContainer);
199
+ break;
200
+ default:
201
+ throw new TypeError(`Illegal role "${role}".`);
202
+ }
203
+
204
+ chatBody.scrollTop = chatBody.scrollHeight;
205
+
206
+ if (role === "user") {
207
+ USER_MESSAGE_RECEIVED = true;
208
+ }
209
+ }
210
+
211
+ if (typeof send_user_message !== 'function') {
212
+ function send_user_message(msg) {
213
+ // This will be overloaded by playwright
214
+ }
215
+ }
216
+
217
+ const inputBox = document.getElementById('inputBox');
218
+
219
+ function send_msg(msg) {
220
+ if (msg.trim()) {
221
+ send_user_message(msg);
222
+ addChatMessage('user', msg);
223
+ inputBox.value = '';
224
+ }
225
+ }
226
+
227
+ inputBox.onkeypress = (e) => {
228
+ if (e.key === 'Enter' && !e.shiftKey) {
229
+ e.preventDefault();
230
+ send_msg(inputBox.value);
231
+ }
232
+ };
233
+
234
+ document.getElementById('chatForm').onsubmit = function (event) {
235
+ event.preventDefault();
236
+ send_msg(inputBox.value);
237
+ return false;
238
+ }
239
+ </script>
240
+
241
+ </body>
242
+
243
+ </html>