iruno commited on
Commit
c8b845e
·
verified ·
1 Parent(s): d0126c7

Delete BrowserGym

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. BrowserGym/.gitignore +0 -154
  2. BrowserGym/.pre-commit-config.yaml +0 -44
  3. BrowserGym/.readthedocs.yaml +0 -32
  4. BrowserGym/LICENSE +0 -13
  5. BrowserGym/Makefile +0 -17
  6. BrowserGym/README.md +0 -254
  7. BrowserGym/browsergym/assistantbench/README.md +0 -21
  8. BrowserGym/browsergym/assistantbench/pyproject.toml +0 -35
  9. BrowserGym/browsergym/assistantbench/requirements.txt +0 -4
  10. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/__init__.py +0 -54
  11. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_dicts.py +0 -68
  12. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_factory.py +0 -28
  13. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_numbers.py +0 -34
  14. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_strings.py +0 -174
  15. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/utils.py +0 -25
  16. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluator.py +0 -132
  17. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/task.py +0 -142
  18. BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/utils.py +0 -73
  19. BrowserGym/browsergym/browsergym.egg-info/PKG-INFO +0 -22
  20. BrowserGym/browsergym/browsergym.egg-info/SOURCES.txt +0 -6
  21. BrowserGym/browsergym/browsergym.egg-info/dependency_links.txt +0 -1
  22. BrowserGym/browsergym/browsergym.egg-info/requires.txt +0 -8
  23. BrowserGym/browsergym/browsergym.egg-info/top_level.txt +0 -1
  24. BrowserGym/browsergym/core/README.md +0 -10
  25. BrowserGym/browsergym/core/pyproject.toml +0 -42
  26. BrowserGym/browsergym/core/requirements.txt +0 -8
  27. BrowserGym/browsergym/core/src/browsergym/core/__init__.py +0 -27
  28. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/__init__.cpython-311.pyc +0 -0
  29. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/chat.cpython-311.pyc +0 -0
  30. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/constants.cpython-311.pyc +0 -0
  31. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/env.cpython-311.pyc +0 -0
  32. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/observation.cpython-311.pyc +0 -0
  33. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/registration.cpython-311.pyc +0 -0
  34. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/spaces.cpython-311.pyc +0 -0
  35. BrowserGym/browsergym/core/src/browsergym/core/__pycache__/task.cpython-311.pyc +0 -0
  36. BrowserGym/browsergym/core/src/browsergym/core/action/__init__.py +0 -11
  37. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/__init__.cpython-311.pyc +0 -0
  38. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/base.cpython-311.pyc +0 -0
  39. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/functions.cpython-311.pyc +0 -0
  40. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/highlevel.cpython-311.pyc +0 -0
  41. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/parsers.cpython-311.pyc +0 -0
  42. BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/utils.cpython-311.pyc +0 -0
  43. BrowserGym/browsergym/core/src/browsergym/core/action/base.py +0 -63
  44. BrowserGym/browsergym/core/src/browsergym/core/action/functions.py +0 -624
  45. BrowserGym/browsergym/core/src/browsergym/core/action/highlevel.py +0 -522
  46. BrowserGym/browsergym/core/src/browsergym/core/action/parsers.py +0 -92
  47. BrowserGym/browsergym/core/src/browsergym/core/action/python.py +0 -112
  48. BrowserGym/browsergym/core/src/browsergym/core/action/utils.py +0 -288
  49. BrowserGym/browsergym/core/src/browsergym/core/chat.py +0 -95
  50. BrowserGym/browsergym/core/src/browsergym/core/chat_files/chatbox.html +0 -243
BrowserGym/.gitignore DELETED
@@ -1,154 +0,0 @@
1
- .DS_store
2
- .idea/
3
- docs/src/generated/
4
-
5
- # Byte-compiled / optimized / DLL files
6
- __pycache__/
7
- *.py[cod]
8
- *$py.class
9
-
10
- # C extensions
11
- *.so
12
-
13
- # Distribution / packaging
14
- .Python
15
- build/
16
- develop-eggs/
17
- dist/
18
- downloads/
19
- eggs/
20
- .eggs/
21
- lib/
22
- lib64/
23
- parts/
24
- sdist/
25
- var/
26
- wheels/
27
- pip-wheel-metadata/
28
- share/python-wheels/
29
- *.egg-info/
30
- .installed.cfg
31
- *.egg
32
- MANIFEST
33
-
34
- # PyInstaller
35
- # Usually these files are written by a python script from a template
36
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
- *.manifest
38
- *.spec
39
-
40
- # Installer logs
41
- pip-log.txt
42
- pip-delete-this-directory.txt
43
-
44
- # Unit test / coverage reports
45
- htmlcov/
46
- .tox/
47
- .nox/
48
- .coverage
49
- .coverage.*
50
- .cache
51
- nosetests.xml
52
- coverage.xml
53
- *.cover
54
- *.py,cover
55
- .hypothesis/
56
- .pytest_cache/
57
-
58
- # Translations
59
- *.mo
60
- *.pot
61
-
62
- # Django stuff:
63
- *.log
64
- local_settings.py
65
- db.sqlite3
66
- db.sqlite3-journal
67
-
68
- # Flask stuff:
69
- instance/
70
- .webassets-cache
71
-
72
- # Scrapy stuff:
73
- .scrapy
74
-
75
- # Sphinx documentation
76
- docs/_build/
77
-
78
- # PyBuilder
79
- target/
80
-
81
- # Jupyter Notebook
82
- .ipynb_checkpoints
83
-
84
- # IPython
85
- profile_default/
86
- ipython_config.py
87
-
88
- # pyenv
89
- .python-version
90
-
91
- # pipenv
92
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
- # install all needed dependencies.
96
- #Pipfile.lock
97
-
98
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99
- __pypackages__/
100
-
101
- # Celery stuff
102
- celerybeat-schedule
103
- celerybeat.pid
104
-
105
- # SageMath parsed files
106
- *.sage.py
107
-
108
- # Environments
109
- .env
110
- .venv
111
- env/
112
- venv/
113
- ENV/
114
- env.bak/
115
- venv.bak/
116
-
117
- # Spyder project settings
118
- .spyderproject
119
- .spyproject
120
-
121
- # Rope project settings
122
- .ropeproject
123
-
124
- # mkdocs documentation
125
- /site
126
-
127
- # mypy
128
- .mypy_cache/
129
- .dmypy.json
130
- dmypy.json
131
-
132
- # Pyre type checker
133
- .pyre/
134
-
135
- # error logs
136
- error_logs.txt
137
-
138
- # tests
139
- tests/results
140
- tmp.py
141
- .vscode/**
142
-
143
- # demo and results
144
- results/
145
-
146
- .vscode/launch.json
147
-
148
- # assistantbench
149
- tests/assistantbench/assistantbench-predictions-test.jsonl
150
-
151
- # weblinx
152
- bg_wl_data/
153
-
154
- uv.lock
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/.pre-commit-config.yaml DELETED
@@ -1,44 +0,0 @@
1
- fail_fast: false
2
-
3
- default_language_version:
4
- python: python3
5
-
6
- repos:
7
- - repo: https://github.com/pre-commit/pre-commit-hooks
8
- rev: v4.2.0
9
- hooks:
10
- - id: trailing-whitespace
11
- exclude: ^(.*)\.md$
12
- - id: end-of-file-fixer
13
- - id: check-yaml
14
- exclude: ^(.circleci/recipe|recipe) # conda build recipes are templated
15
- - id: check-added-large-files
16
- - repo: https://github.com/pocc/pre-commit-hooks
17
- rev: v1.1.1
18
- hooks:
19
- - id: clang-format
20
- args: [--style=file, -i]
21
- - id: clang-tidy
22
- args: [--fix, --fix-errors]
23
- - repo: https://github.com/psf/black
24
- rev: 24.2.0
25
- hooks:
26
- - id: black
27
- args: [--config=./pyproject.toml]
28
- - repo: https://github.com/asottile/blacken-docs
29
- rev: v1.12.1
30
- hooks:
31
- - id: blacken-docs
32
- args: [ '--line-length', '100' ]
33
- additional_dependencies: [black]
34
- - repo: https://github.com/Lucas-C/pre-commit-hooks
35
- rev: v1.5.5
36
- hooks:
37
- - id: forbid-crlf
38
- - id: remove-crlf
39
- # Black does not clear tabs in docstrings
40
- - id: forbid-tabs
41
- files: '.*\.py$'
42
- - id: remove-tabs
43
- files: '.*\.py$'
44
- args: [ '--whitespaces-count', '4' ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/.readthedocs.yaml DELETED
@@ -1,32 +0,0 @@
1
- # .readthedocs.yaml
2
- # Read the Docs configuration file
3
- # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4
-
5
- # Required
6
- version: 2
7
-
8
- # Set the OS, Python version and other tools you might need
9
- build:
10
- os: ubuntu-22.04
11
- tools:
12
- python: "3.12"
13
- # You can also specify other tool versions:
14
- # nodejs: "19"
15
- # rust: "1.64"
16
- # golang: "1.19"
17
-
18
- # Build documentation in the "docs/" directory with Sphinx
19
- sphinx:
20
- configuration: docs/src/conf.py
21
-
22
- # Optionally build your docs in additional formats such as PDF and ePub
23
- # formats:
24
- # - pdf
25
- # - epub
26
-
27
- # Optional but recommended, declare the Python requirements required
28
- # to build your documentation
29
- # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
30
- python:
31
- install:
32
- - requirements: docs/requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/LICENSE DELETED
@@ -1,13 +0,0 @@
1
- Copyright 2024 ServiceNow
2
-
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/Makefile DELETED
@@ -1,17 +0,0 @@
1
- install:
2
- @echo "--- 🚀 Installing project dependencies ---"
3
- pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/
4
- playwright install chromium
5
-
6
- install-demo:
7
- @echo "--- 🚀 Installing demo dependencies ---"
8
- pip install -r demo_agent/requirements.txt
9
- playwright install chromium
10
-
11
- demo:
12
- @echo "--- 🚀 Running demo agent ---"
13
- (set -x && cd demo_agent && python run_demo.py)
14
-
15
- test-core:
16
- @echo "--- 🧪 Running tests ---"
17
- pytest -n auto ./tests/core
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/README.md DELETED
@@ -1,254 +0,0 @@
1
- <div align="center">
2
-
3
- ![BrowserGym banner](https://github.com/user-attachments/assets/4853f210-43ac-4107-a0d2-95c9c614dbe7)
4
-
5
- 🛠️ [Setup](#%EF%B8%8F-setup) -
6
- 🏋 [Usage](#-usage) -
7
- 💻 [Demo](#-demo) -
8
- 🌐 [Ecosystem](#-ecosystem) -
9
- 🚀 [AgentLab](https://github.com/ServiceNow/AgentLab) -
10
- 🌟 [Contributors](#-contributors) -
11
- 📄 [Paper](https://arxiv.org/abs/2412.05467) -
12
- 📝 [Citation](#-citing-this-work)
13
-
14
- [![pypi](https://badge.fury.io/py/browsergym.svg)](https://pypi.org/project/browsergym/)
15
- [![PyPI - License](https://img.shields.io/pypi/l/browsergym?style=flat-square)]([https://opensource.org/licenses/MIT](http://www.apache.org/licenses/LICENSE-2.0))
16
- [![PyPI - Downloads](https://img.shields.io/pypi/dm/browsergym-core?style=flat-square)](https://pypistats.org/packages/browsergym-core)
17
- [![GitHub star chart](https://img.shields.io/github/stars/ServiceNow/BrowserGym?style=flat-square)](https://star-history.com/#ServiceNow/BrowserGym)
18
- [![Code Format](https://github.com/ServiceNow/BrowserGym/actions/workflows/code_format.yml/badge.svg)](https://github.com/ServiceNow/BrowserGym/actions/workflows/code_format.yml)
19
- [![Tests](https://github.com/ServiceNow/BrowserGym/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/ServiceNow/BrowserGym/actions/workflows/unit_tests.yml)
20
-
21
- ```python
22
- pip install browsergym
23
- ```
24
-
25
- </div>
26
-
27
- > [!WARNING]
28
- > BrowserGym is meant to provide an open, easy-to-use and extensible framework to accelerate the field of web agent research.
29
- > It is not meant to be a consumer product. Use with caution!
30
-
31
- > [!TIP]
32
- > 🚀 Check out [AgentLab](https://github.com/ServiceNow/AgentLab)✨ !
33
- > A seamless framework to implement, test, and evaluate your web agents on all BrowserGym benchmarks.
34
-
35
- https://github.com/ServiceNow/BrowserGym/assets/26232819/e0bfc788-cc8e-44f1-b8c3-0d1114108b85
36
-
37
- _Example of a GPT4-V agent executing openended tasks (top row, chat interactive), as well as WebArena and WorkArena tasks (bottom row)._
38
-
39
- BrowserGym includes the following benchmarks by default:
40
- - [MiniWoB](https://miniwob.farama.org/)
41
- - [WebArena](https://webarena.dev/)
42
- - [VisualWebArena](https://jykoh.com/vwa)
43
- - [WorkArena](https://github.com/ServiceNow/WorkArena)
44
- - [AssistantBench](https://github.com/oriyor/assistantbench)
45
- - [WebLINX](https://github.com/McGill-NLP/weblinx) (static benchmark)
46
-
47
- Designing new web benchmarks with BrowserGym is easy, and simply requires to inherit the [`AbstractBrowserTask`](https://github.com/ServiceNow/BrowserGym/blob/main/browsergym/core/src/browsergym/core/task.py#L7C7-L7C26) class.
48
-
49
- ## 🛠️ Setup
50
-
51
- To use browsergym, install one of the following packages:
52
- ```sh
53
- pip install browsergym # (recommended) everything below
54
- pip install browsergym-experiments # experiment utilities (agent, loop, benchmarks) + everything below
55
- pip install browsergym-core # core functionalities only (no benchmark, just the openended task)
56
- pip install browsergym-miniwob # core + miniwob
57
- pip install browsergym-webarena # core + webarena
58
- pip install browsergym-visualwebarena # core + visualwebarena
59
- pip install browsergym-workarena # core + workarena
60
- pip install browsergym-assistantbench # core + assistantbench
61
- pip install weblinx-browsergym # core + weblinx
62
- ```
63
-
64
- Then setup playwright by running
65
- ```sh
66
- playwright install chromium
67
- ```
68
-
69
- Finally, each benchmark comes with its own specific setup that requires to follow additional steps.
70
- - for MiniWoB++, see [miniwob/README.md](browsergym/miniwob/README.md)
71
- - for WebArena, see [webarena/README.md](browsergym/webarena/README.md)
72
- - for VisualWebArena, see [visualwebarena/README.md](browsergym/visualwebarena/README.md)
73
- - for WorkArena, see [WorkArena](https://github.com/ServiceNow/WorkArena)
74
- - for AssistantBench, see [assistantbench/README.md](browsergym/assistantbench/README.md)
75
-
76
- ### 🏗️ Development setup
77
-
78
- To install browsergym locally for development, use the following commands:
79
- ```sh
80
- git clone git@github.com:ServiceNow/BrowserGym.git
81
- cd BrowserGym
82
- make install
83
- ```
84
-
85
- Contributions are welcome! 😊
86
-
87
- ## 🏋 Usage
88
-
89
- Boilerplate code to run an agent on an interactive, open-ended task:
90
- ```python
91
- import gymnasium as gym
92
- import browsergym.core # register the openended task as a gym environment
93
-
94
- # start an openended environment
95
- env = gym.make(
96
- "browsergym/openended",
97
- task_kwargs={"start_url": "https://www.google.com/"}, # starting URL
98
- wait_for_user_message=True, # wait for a user message after each agent message sent to the chat
99
- )
100
- # run the environment <> agent loop until termination
101
- obs, info = env.reset()
102
- while True:
103
- action = ... # implement your agent here
104
- obs, reward, terminated, truncated, info = env.step(action)
105
- if terminated or truncated:
106
- break
107
- # release the environment
108
- env.close()
109
- ```
110
-
111
- MiniWoB
112
- ```python
113
- import gymnasium as gym
114
- import browsergym.miniwob # register miniwob tasks as gym environments
115
-
116
- # start a miniwob task
117
- env = gym.make("browsergym/miniwob.choose-list")
118
- ...
119
-
120
- # list all the available miniwob tasks
121
- env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/miniwob")]
122
- print("\n".join(env_ids))
123
- ```
124
-
125
- WorkArena
126
- ```python
127
- import gymnasium as gym
128
- import browsergym.workarena # register workarena tasks as gym environments
129
-
130
- # start a workarena task
131
- env = gym.make("browsergym/workarena.servicenow.order-ipad-pro")
132
- ...
133
-
134
- # list all the available workarena tasks
135
- env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/workarena")]
136
- print("\n".join(env_ids))
137
- ```
138
-
139
- WebArena
140
- ```python
141
- import gymnasium as gym
142
- import browsergym.webarena # register webarena tasks as gym environments
143
-
144
- # start a webarena task
145
- env = gym.make("browsergym/webarena.310")
146
- ...
147
-
148
- # list all the available webarena tasks
149
- env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/webarena")]
150
- print("\n".join(env_ids))
151
- ```
152
-
153
- VisualWebArena
154
- ```python
155
- import gymnasium as gym
156
- import browsergym.webarena # register webarena tasks as gym environments
157
-
158
- # start a visualwebarena task
159
- env = gym.make("browsergym/visualwebarena.721")
160
- ...
161
-
162
- # list all the available visualwebarena tasks
163
- env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/visualwebarena")]
164
- print("\n".join(env_ids))
165
- ```
166
-
167
- AssistantBench
168
- ```python
169
- import gymnasium as gym
170
- import browsergym.workarena # register assistantbench tasks as gym environments
171
-
172
- # start an assistantbench task
173
- env = gym.make("browsergym/assistantbench.validation.3")
174
- ...
175
-
176
- # list all the available assistantbench tasks
177
- env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/workarena")]
178
- print("\n".join(env_ids))
179
- ```
180
-
181
- ## 💻 Demo
182
-
183
- If you want to experiment with a demo agent in BrowserGym, follow these steps
184
- ```sh
185
- # conda setup
186
- conda env create -f demo_agent/environment.yml
187
- conda activate demo_agent
188
-
189
- # or pip setup
190
- pip install -r demo_agent/requirements.txt
191
-
192
- # then download the browser for playwright
193
- playwright install chromium
194
- ```
195
-
196
- Our demo agent uses `openai` as a backend, be sure to set your `OPENAI_API_KEY`.
197
-
198
- Launch the demo agent as follows
199
- ```sh
200
- # openended (interactive chat mode)
201
- python demo_agent/run_demo.py --task_name openended --start_url https://www.google.com
202
-
203
- # miniwob
204
- python demo_agent/run_demo.py --task_name miniwob.click-test
205
-
206
- # workarena
207
- python demo_agent/run_demo.py --task_name workarena.servicenow.order-standard-laptop
208
-
209
- # webarena
210
- python demo_agent/run_demo.py --task_name webarena.4
211
-
212
- # visualwebarena
213
- python demo_agent/run_demo.py --task_name visualwebarena.398
214
- ```
215
-
216
- You can customize your experience by changing the `model_name` to your preferred LLM (it uses `gpt-4o-mini` by default), adding screenshots for your VLMs with `use_screenshot`, and much more!
217
-
218
- ```python
219
- python demo_agent/run_demo.py --help
220
- ```
221
-
222
- ## 🌐 Ecosystem
223
-
224
- - [AgentLab](https://github.com/ServiceNow/AgentLab): Seamlessly run agents on benchmarks, collect and analyse traces.
225
- - [WorkArena(++)](https://github.com/ServiceNow/WorkArena): A benchmark for web agents on the ServiceNow platform.
226
- - [WebArena](https://github.com/web-arena-x/webarena): A benchmark of realistic web tasks on self-hosted domains.
227
- - [VisualWebArena](https://github.com/web-arena-x/visualwebarena): A benchmark of realistic visual web tasks on self-hosted domains.
228
- - [MiniWoB(++)](https://miniwob.farama.org/): A collection of over 100 web tasks on synthetic web pages.
229
- - [WebLINX](https://github.com/McGill-NLP/weblinx): A dataset of real-world web interaction traces.
230
- - [AssistantBench](https://github.com/oriyor/assistantbench): A benchmark of realistic and time-consuming tasks on the open web.
231
- - [DoomArena](https://github.com/ServiceNow/DoomArena): A framework for AI agent security testing which supports injecting attacks into web pages from Browsergym environments.
232
-
233
- ## 🌟 Contributors
234
-
235
- [![BrowserGym contributors](https://contrib.rocks/image?repo=ServiceNow/BrowserGym&max=2000)](https://github.com/ServiceNow/BrowserGym/graphs/contributors)
236
-
237
- ## 📝 Citing This Work
238
-
239
- Please use the following BibTeX to cite our work:
240
- ```tex
241
- @inproceedings{workarena2024,
242
- title = {{W}ork{A}rena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?},
243
- author = {Drouin, Alexandre and Gasse, Maxime and Caccia, Massimo and Laradji, Issam H. and Del Verme, Manuel and Marty, Tom and Vazquez, David and Chapados, Nicolas and Lacoste, Alexandre},
244
- booktitle = {Proceedings of the 41st International Conference on Machine Learning},
245
- pages = {11642--11662},
246
- year = {2024},
247
- editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix},
248
- volume = {235},
249
- series = {Proceedings of Machine Learning Research},
250
- month = {21--27 Jul},
251
- publisher = {PMLR},
252
- url = {https://proceedings.mlr.press/v235/drouin24a.html},
253
- }
254
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/README.md DELETED
@@ -1,21 +0,0 @@
1
- # AssistantBench <> BrowserGym
2
-
3
- This package provides an implementation for using the [AssistantBench](https://assistantbench.github.io/) benchmark in BrowserGym.
4
-
5
- Because AssistantBench includes open-ended tasks, setup is extremely easy and simply requires installing the package.
6
-
7
- Please note that AssistantBench has a hidden test set, so test set predictions will need to be uploaded to the official [leaderboard](https://huggingface.co/spaces/AssistantBench/leaderboard).
8
-
9
- ## Setting up
10
-
11
- - Install the package (this is still a wip)
12
- ```
13
- pip install browsergym-assistantbench
14
- ```
15
-
16
- - Run inference, e.g., run the following commands for demo on a simple toy task
17
- ```
18
- python demo_agent/run_demo.py --task_name assistantbench.validation.3
19
- ```
20
-
21
- - Test set predictions will be saved to `./assistantbench-predictions-test.jsonl`. To evaluate on the official test set, upload these predictions to the official [leaderboard](https://huggingface.co/spaces/AssistantBench/leaderboard).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/pyproject.toml DELETED
@@ -1,35 +0,0 @@
1
- [build-system]
2
- requires = ["hatchling", "hatch-requirements-txt"]
3
- build-backend = "hatchling.build"
4
-
5
- [project]
6
- name = "browsergym-assistantbench"
7
- description = "AssistantBench benchmark for BrowserGym"
8
- authors = [
9
- {name = "Ori Yoran"},
10
- {name = "Maxime Gasse"},
11
- ]
12
- readme = "README.md"
13
- requires-python = ">3.7"
14
- license = {text = "Apache-2.0"}
15
- classifiers = [
16
- "Development Status :: 3 - Alpha",
17
- "Programming Language :: Python :: 3",
18
- "Operating System :: OS Independent",
19
- "Intended Audience :: Science/Research",
20
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
- "License :: OSI Approved :: Apache Software License",
22
- ]
23
- dynamic = ["dependencies", "version"]
24
-
25
- [project.urls]
26
- homepage = "https://github.com/ServiceNow/BrowserGym"
27
-
28
- [tool.hatch.version]
29
- path = "../core/src/browsergym/core/__init__.py"
30
-
31
- [tool.hatch.metadata.hooks.requirements_txt]
32
- files = ["requirements.txt"]
33
-
34
- [tool.hatch.build.targets.wheel]
35
- packages = ["src/browsergym"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/requirements.txt DELETED
@@ -1,4 +0,0 @@
1
- browsergym-core==0.13.4
2
- datasets
3
- scipy
4
- numpy
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/__init__.py DELETED
@@ -1,54 +0,0 @@
1
- from browsergym.core.registration import register_task
2
-
3
- from . import task
4
-
5
- TOY_AB_TASK_IDS = []
6
- VALID_AB_TASK_IDS = []
7
- TEST_AB_TASK_IDS = []
8
-
9
-
10
- # register a toy easy task for testing implementation
11
- gym_id = f"assistantbench.imp.0"
12
- register_task(
13
- gym_id,
14
- task.AssistantBenchTask,
15
- task_kwargs={
16
- "task_id": f"imp.0",
17
- },
18
- default_task_kwargs={
19
- "save_predictions": False, # can be overriden
20
- },
21
- )
22
- TOY_AB_TASK_IDS.append(gym_id)
23
-
24
- # register the AssistantBench dev set
25
- for task_id in range(33):
26
- gym_id = f"assistantbench.validation.{task_id}"
27
- register_task(
28
- gym_id,
29
- task.AssistantBenchTask,
30
- task_kwargs={
31
- "task_id": f"validation.{task_id}",
32
- },
33
- default_task_kwargs={
34
- "save_predictions": False, # can be overriden
35
- },
36
- )
37
- VALID_AB_TASK_IDS.append(gym_id)
38
-
39
- # register the AssistantBench test set
40
- for task_id in range(181):
41
- gym_id = f"assistantbench.test.{task_id}"
42
- register_task(
43
- gym_id,
44
- task.AssistantBenchTask,
45
- task_kwargs={
46
- "task_id": f"test.{task_id}",
47
- },
48
- default_task_kwargs={
49
- "save_predictions": True, # can be overriden
50
- },
51
- )
52
- TEST_AB_TASK_IDS.append(gym_id)
53
-
54
- ALL_AB_TASK_IDS = TOY_AB_TASK_IDS + VALID_AB_TASK_IDS + TEST_AB_TASK_IDS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_dicts.py DELETED
@@ -1,68 +0,0 @@
1
- from typing import Dict, List
2
-
3
- import numpy as np
4
-
5
- from .utils import _align_bags
6
-
7
-
8
- def calculate_f1_score(precision, recall):
9
- if precision + recall == 0:
10
- return 0 # Handle the case to avoid division by zero
11
- return 2 * (precision * recall) / (precision + recall)
12
-
13
-
14
- def calc_recall(pred: Dict, gold: Dict, use_gold_for_eval: bool):
15
- from .evaluate_factory import get_evaluator_from_gold_answer
16
-
17
- recall = []
18
- for gold_key, gold_value in gold.items():
19
- pred_value = pred.get(gold_key)
20
- gold_value = fix_number(gold_value)
21
- pred_value = fix_number(pred_value)
22
- if gold_key not in pred:
23
- recall.append(0)
24
- else:
25
- evaluator = (
26
- get_evaluator_from_gold_answer(type(gold_value))
27
- if use_gold_for_eval
28
- else get_evaluator_from_gold_answer(type(pred_value))
29
- )
30
- if type(pred_value) != type(gold_value):
31
- recall.append(0)
32
- continue
33
- recall.append(evaluator(pred_value, gold_value))
34
- avg_recall = np.average(recall)
35
- return avg_recall
36
-
37
-
38
- def fix_number(number):
39
-
40
- if type(number) == str:
41
- copy_ans = number
42
- copy_ans = " ".join(
43
- " ".join(" ".join(copy_ans.split("$")).split("%")).split("sqft")
44
- ).strip()
45
- copy_ans = copy_ans.strip()
46
- copy_ans = copy_ans.replace(",", ".")
47
- try:
48
- return float(copy_ans)
49
- except:
50
- return number
51
- elif type(number) == int:
52
- return float(number)
53
- else:
54
- return number
55
-
56
-
57
- def evaluate_pair_of_dicts(pred: Dict, gold: Dict):
58
- recall = calc_recall(pred, gold, True)
59
- precision = calc_recall(gold, pred, False)
60
- f1 = calculate_f1_score(precision, recall)
61
- return f1
62
-
63
-
64
- def evaluate_dicts(pred: List[Dict], gold: List[Dict]):
65
- if not (type(pred) == dict or len(pred) == 0 or (type(pred) == list and type(pred[0]) == dict)):
66
- return 0
67
- max_alignment_scores = _align_bags(pred, gold, evaluate_pair_of_dicts)
68
- return np.average(max_alignment_scores)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_factory.py DELETED
@@ -1,28 +0,0 @@
1
- from typing import Union
2
-
3
- from .evaluate_dicts import evaluate_dicts
4
- from .evaluate_numbers import evaluate_numbers
5
- from .evaluate_strings import evaluate_strings
6
-
7
- EvaluatorFactory = {
8
- "string": evaluate_strings,
9
- "number": evaluate_numbers,
10
- "json": evaluate_dicts,
11
- "string list": evaluate_strings,
12
- }
13
-
14
- EvaluatorFactoryFromType = {
15
- str: evaluate_strings,
16
- int: evaluate_numbers,
17
- float: evaluate_numbers,
18
- bool: evaluate_strings,
19
- list: evaluate_strings,
20
- }
21
-
22
-
23
- def get_evaluator(evaluator: str):
24
- return EvaluatorFactory[evaluator]
25
-
26
-
27
- def get_evaluator_from_gold_answer(gold_answer: Union[str, int, float]):
28
- return EvaluatorFactoryFromType[gold_answer]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_numbers.py DELETED
@@ -1,34 +0,0 @@
1
- from typing import Union
2
-
3
- import numpy as np
4
-
5
-
6
- # Renamed calc_z function to distance_function_log
7
- def distance_function_log(pred: float, gold: float):
8
- if pred == gold == 0:
9
- return 1
10
- if pred == 0:
11
- pred = 1e-4
12
- if gold == 0:
13
- gold = 1e-4
14
- if pred > gold:
15
- return max(0, 1 - np.log(pred / gold))
16
- else:
17
- return max(0, 1 - np.log(gold / pred))
18
-
19
-
20
- def evaluate_numbers(pred: Union[float, str], gold: float):
21
- res = None
22
- if type(pred) != float and type(pred) != int:
23
- try:
24
- pred = float(pred)
25
- except ValueError:
26
- res = 0
27
- if type(gold) != float and type(gold) != int:
28
- try:
29
- gold = float(gold)
30
- except ValueError:
31
- res = 0
32
- if res is None:
33
- res = distance_function_log(pred, gold)
34
- return res
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_strings.py DELETED
@@ -1,174 +0,0 @@
1
- """
2
- Evaluation for two strings or list of strings.
3
- Code taken from the DROP benchmark - https://github.com/allenai/allennlp-reading-comprehension/blob/master/allennlp_rc/eval/drop_eval.py
4
- """
5
-
6
- import re
7
- import string
8
- from typing import List, Set, Tuple, Union
9
-
10
- import numpy as np
11
- from scipy.optimize import linear_sum_assignment
12
-
13
-
14
- # From here through _normalize_answer was originally copied from:
15
- # https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/
16
- # Then cleaned up and modified a bit.
17
- def _remove_articles(text: str) -> str:
18
- regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
19
- return re.sub(regex, " ", text)
20
-
21
-
22
- def _white_space_fix(text: str) -> str:
23
- return " ".join(text.split())
24
-
25
-
26
- EXCLUDE = set(string.punctuation)
27
-
28
-
29
- def _remove_punc(text: str) -> str:
30
- if not _is_number(text):
31
- return "".join(ch for ch in text if ch not in EXCLUDE)
32
- else:
33
- return text
34
-
35
-
36
- def _lower(text: str) -> str:
37
- return text.lower()
38
-
39
-
40
- def _tokenize(text: str) -> List[str]:
41
- return re.split(" |-", text)
42
-
43
-
44
- def _normalize_answer(text: str) -> str:
45
- """Lower text and remove punctuation, articles and extra whitespace."""
46
-
47
- parts = [
48
- _white_space_fix(_remove_articles(_normalize_number(_remove_punc(_lower(token)))))
49
- for token in _tokenize(text)
50
- ]
51
- parts = [part for part in parts if part.strip()]
52
- normalized = " ".join(parts).strip()
53
- return normalized
54
-
55
-
56
- def _is_number(text: str) -> bool:
57
- try:
58
- float(text)
59
- return True
60
- except ValueError:
61
- return False
62
-
63
-
64
- def _normalize_number(text: str) -> str:
65
- if _is_number(text):
66
- return str(float(text))
67
- else:
68
- return text
69
-
70
-
71
- def _answer_to_bags(
72
- answer: Union[str, List[str], Tuple[str, ...]]
73
- ) -> Tuple[List[str], List[Set[str]]]:
74
- if isinstance(answer, (list, tuple)):
75
- raw_spans = answer
76
- else:
77
- raw_spans = [answer]
78
- normalized_spans: List[str] = []
79
- token_bags = []
80
- for raw_span in raw_spans:
81
- normalized_span = _normalize_answer(raw_span)
82
- normalized_spans.append(normalized_span)
83
- token_bags.append(set(normalized_span.split()))
84
- return normalized_spans, token_bags
85
-
86
-
87
- def _align_bags(predicted: List[Set[str]], gold: List[Set[str]]) -> List[float]:
88
- """
89
- Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
90
- between them and gets maximum metric values over all the answers.
91
- """
92
- scores = np.zeros([len(gold), len(predicted)])
93
- for gold_index, gold_item in enumerate(gold):
94
- for pred_index, pred_item in enumerate(predicted):
95
- if _match_numbers_if_present(gold_item, pred_item):
96
- scores[gold_index, pred_index] = _compute_f1(pred_item, gold_item)
97
- row_ind, col_ind = linear_sum_assignment(-scores)
98
-
99
- max_scores = np.zeros([max(len(gold), len(predicted))])
100
- for row, column in zip(row_ind, col_ind):
101
- max_scores[row] = max(max_scores[row], scores[row, column])
102
- return max_scores
103
-
104
-
105
- def _compute_f1(predicted_bag: Set[str], gold_bag: Set[str]) -> float:
106
- intersection = len(gold_bag.intersection(predicted_bag))
107
- if not predicted_bag:
108
- precision = 1.0
109
- else:
110
- precision = intersection / float(len(predicted_bag))
111
- if not gold_bag:
112
- recall = 1.0
113
- else:
114
- recall = intersection / float(len(gold_bag))
115
- f1 = (
116
- (2 * precision * recall) / (precision + recall)
117
- if not (precision == 0.0 and recall == 0.0)
118
- else 0.0
119
- )
120
- return f1
121
-
122
-
123
- def _match_numbers_if_present(gold_bag: Set[str], predicted_bag: Set[str]) -> bool:
124
- gold_numbers = set()
125
- predicted_numbers = set()
126
- for word in gold_bag:
127
- if _is_number(word):
128
- gold_numbers.add(word)
129
- for word in predicted_bag:
130
- if _is_number(word):
131
- predicted_numbers.add(word)
132
- if (not gold_numbers) or gold_numbers.intersection(predicted_numbers):
133
- return True
134
- return False
135
-
136
-
137
- def get_metrics(
138
- predicted: Union[str, List[str], Tuple[str, ...]],
139
- gold: Union[str, List[str], Tuple[str, ...]],
140
- ) -> Tuple[float, float]:
141
- """
142
- Takes a predicted answer and a gold answer (that are both either a string or a list of
143
- strings), and returns exact match and the DROP F1 metric for the prediction. If you are
144
- writing a script for evaluating objects in memory (say, the output of predictions during
145
- validation, or while training), this is the function you want to call, after using
146
- :func:`answer_json_to_strings` when reading the gold answer from the released data file.
147
- """
148
- predicted_bags = _answer_to_bags(predicted)
149
- gold_bags = _answer_to_bags(gold)
150
-
151
- if set(predicted_bags[0]) == set(gold_bags[0]) and len(predicted_bags[0]) == len(gold_bags[0]):
152
- exact_match = 1.0
153
- else:
154
- exact_match = 0.0
155
-
156
- f1_per_bag = _align_bags(predicted_bags[1], gold_bags[1])
157
- f1 = np.mean(f1_per_bag)
158
- f1 = round(f1, 2)
159
- return exact_match, f1
160
-
161
-
162
- def evaluate_strings(prediction, gold):
163
- if type(prediction) != list and type(prediction) != str:
164
- prediction = str(prediction)
165
- if type(gold) != list and type(gold) != str:
166
- gold = str(gold)
167
- try:
168
- predicted_bags = _answer_to_bags(prediction)
169
- gold_bags = _answer_to_bags(gold)
170
- f1_per_bag = _align_bags(predicted_bags[1], gold_bags[1])
171
- f1 = np.mean(f1_per_bag)
172
- except Exception:
173
- f1 = 0.0
174
- return f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/utils.py DELETED
@@ -1,25 +0,0 @@
1
- from typing import Callable, List, Set
2
-
3
- import numpy as np
4
- from scipy.optimize import linear_sum_assignment
5
-
6
-
7
- def _align_bags(
8
- predicted: List[Set[str]],
9
- gold: List[Set[str]],
10
- method: Callable[[object, object], float],
11
- ) -> List[float]:
12
- """
13
- Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
14
- between them and gets maximum metric values over all the answers.
15
- """
16
- scores = np.zeros([len(gold), len(predicted)])
17
- for gold_index, gold_item in enumerate(gold):
18
- for pred_index, pred_item in enumerate(predicted):
19
- scores[gold_index, pred_index] = method(pred_item, gold_item)
20
- row_ind, col_ind = linear_sum_assignment(-scores)
21
-
22
- max_scores = np.zeros([max(len(gold), len(predicted))])
23
- for row, column in zip(row_ind, col_ind):
24
- max_scores[row] = max(max_scores[row], scores[row, column])
25
- return max_scores
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluator.py DELETED
@@ -1,132 +0,0 @@
1
- # todo export evaluation to a python package
2
-
3
- import json
4
-
5
- import numpy as np
6
-
7
- from .evaluate_utils.evaluate_factory import get_evaluator
8
-
9
-
10
- def find_isnan(samp):
11
- try:
12
- if np.isnan(samp):
13
- return True
14
- else:
15
- return False
16
- except:
17
- return False
18
-
19
-
20
- def fix_ans(answer):
21
- try:
22
- answer = (
23
- answer.replace("{'", '{"')
24
- .replace("', '", '", "')
25
- .replace("': '", '": "')
26
- .replace("'}", '"}')
27
- )
28
- answer = answer.replace("': ", '": ')
29
- return answer
30
- except:
31
- return answer
32
-
33
-
34
- def parse_answer(answer):
35
- if len(answer) == 1:
36
- ans, is_num = fix_number(answer[0])
37
- if is_num:
38
- return ans, "number"
39
- try:
40
- ans = json.loads(fix_ans(answer[0]))
41
- return [ans], "json"
42
- except:
43
- ans, is_num = fix_number(answer[0])
44
- if is_num:
45
- return ans, "number"
46
- else:
47
- return answer[0], "string"
48
- else:
49
- try:
50
- ans = [json.loads(fix_ans(ex)) for ex in answer]
51
- return ans, "json"
52
- except:
53
- return answer, "string list"
54
-
55
-
56
- def fix_number(number):
57
- if type(number) == str:
58
- copy_ans = number
59
- copy_ans = " ".join(
60
- " ".join(" ".join(copy_ans.split("$")).split("%")).split("sqft")
61
- ).strip()
62
- copy_ans = copy_ans.strip()
63
- copy_ans = copy_ans.replace(",", ".").replace(" square kilometers", "")
64
- try:
65
- return float(copy_ans), True
66
- except:
67
- return number, False
68
- elif type(number) == int:
69
- return float(number), True
70
- else:
71
- return number, True
72
-
73
-
74
- def fix_prediction(prediction, gold_answer, evaluator):
75
- if (
76
- type(prediction) == list
77
- and len(prediction) == 1
78
- and (
79
- type(prediction[0]) == int
80
- or ((type(prediction[0]) == str) and prediction[0].isnumeric())
81
- )
82
- ):
83
- prediction = fix_number(prediction[0])
84
-
85
- if type(prediction) != list:
86
- prediction, is_num = fix_number(prediction)
87
- if evaluator == "json":
88
- try:
89
- prediction = [json.loads(pred) for pred in prediction.split("\n")]
90
- except:
91
- prediction = [prediction]
92
-
93
- if (hasattr(type(prediction), "__len__")) and (len(prediction) == 0):
94
- return prediction, False
95
-
96
- if (type(prediction) == list and len(prediction) > 1) and type(gold_answer) == float:
97
- return prediction, False
98
-
99
- return prediction, True
100
-
101
-
102
- def question_scorer(prediction, gold_answer):
103
- try:
104
- prediction = json.loads(prediction)
105
- except:
106
- prediction = prediction
107
-
108
- answer_list = (
109
- [x for x in gold_answer.split("\n") if len(x.strip()) > 0]
110
- if type(gold_answer) != list
111
- else gold_answer
112
- )
113
- gold_answer, evaluator = parse_answer(answer_list)
114
- prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)
115
-
116
- has_ans = 1.0
117
- if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
118
- has_ans = 0.0
119
-
120
- if type(prediction) == list:
121
- if all(
122
- (type(pred) not in {float, int} and len(pred) == 0) or find_isnan(pred)
123
- for pred in prediction
124
- ):
125
- has_ans = 0
126
-
127
- if not run_eval:
128
- return 0.0, has_ans
129
-
130
- metric_eval = get_evaluator(evaluator)
131
- accuracy = metric_eval(prediction, gold_answer)
132
- return accuracy, has_ans
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/task.py DELETED
@@ -1,142 +0,0 @@
1
- import logging
2
- import os
3
- from typing import Dict, Tuple
4
-
5
- from datasets import load_dataset
6
- from playwright.sync_api import Page
7
-
8
- from browsergym.core.task import AbstractBrowserTask
9
-
10
- from .evaluation.evaluator import question_scorer
11
- from .utils import add_prediction_to_jsonl
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
- _DEFAULT_OUTPUT_FILE = None
16
-
17
-
18
- def set_default_output_file(output_file: str):
19
- global _DEFAULT_OUTPUT_FILE
20
- _DEFAULT_OUTPUT_FILE = output_file
21
-
22
-
23
- def get_default_output_file():
24
- return _DEFAULT_OUTPUT_FILE
25
-
26
-
27
- # Load dataset
28
-
29
- DATA_DATASET = "AssistantBench/AssistantBench"
30
- all_tasks = load_dataset(DATA_DATASET, trust_remote_code=True)
31
-
32
-
33
- # Extract answers and tasks for validation and test splits
34
- def extract_data(split_name: str) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
35
- return (
36
- {
37
- f"{split_name}.{i}": row["answer"] if row["answer"] is not None else ""
38
- for i, row in enumerate(all_tasks[split_name])
39
- },
40
- {f"{split_name}.{i}": row["task"] for i, row in enumerate(all_tasks[split_name])},
41
- {f"{split_name}.{i}": row["id"] for i, row in enumerate(all_tasks[split_name])},
42
- )
43
-
44
-
45
- # Implementation data for testing
46
- def get_implementation_testing_data() -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
47
- return (
48
- {"imp.0": "20"},
49
- {
50
- "imp.0": "What is the weather in Paris yesterday in Celsius? Answer with the number only."
51
- },
52
- {"imp.0": "test_imp_id_0"},
53
- )
54
-
55
-
56
- # Combine dev, test, and implementation-specific testing splits
57
- gold_answers_dev, tasks_dev, ids_dev = extract_data("validation")
58
- gold_answers_test, tasks_test, ids_test = extract_data("test")
59
- gold_answers_impl_testing, tasks_test_impl_testing, ids_imp_testing = (
60
- get_implementation_testing_data()
61
- )
62
- gold_answers = {**gold_answers_dev, **gold_answers_test, **gold_answers_impl_testing}
63
- tasks = {**tasks_dev, **tasks_test, **tasks_test_impl_testing}
64
- ids = {**ids_dev, **ids_test, **ids_imp_testing}
65
-
66
-
67
- class AssistantBenchTask(AbstractBrowserTask):
68
-
69
- @classmethod
70
- def get_task_id(cls) -> str:
71
- """
72
- Generic class for several task ids, this way of obtaining the task id is not compatible for now.
73
- """
74
- raise NotImplementedError
75
-
76
- def __init__(
77
- self, seed: int, task_id: str, output_file: str = None, save_predictions: bool = False
78
- ) -> None:
79
- """
80
- Args:
81
- seed (int): Random seed for task initialization.
82
- task_id (str): Unique identifier for the task (for the BrowserGym environment).
83
- output_file (str, optional): Path to the output file for saving results, needed for test set.
84
- save_predictions (bool, optional): Save predictions to the output file (yes/no).
85
- """
86
- super().__init__(seed)
87
- self.locale = "en-US"
88
- self.timezone_id = "America/New_York"
89
-
90
- self.task_id = task_id
91
- self.start_url = "https://google.com"
92
- self.goal = tasks[str(self.task_id)]
93
- self.gold = gold_answers[str(self.task_id)]
94
- self.ab_task_id = ids[self.task_id]
95
- self.save_predictions = save_predictions
96
-
97
- self.output_file = output_file
98
-
99
- # set output_file using the global default value, if not provided in constructor
100
- if not self.output_file:
101
- self.output_file = get_default_output_file()
102
- # use env variable in last resort
103
- if not self.output_file:
104
- self.output_file = os.getenv("ASSISTANTBENCH_OUTPUT_FILE", None)
105
-
106
- if self.save_predictions and self.output_file:
107
- logger.info(f"Task prediction will be written to output file {self.output_file}")
108
-
109
- def setup(self, page: Page) -> Tuple[str, dict]:
110
- logger.info(f"Navigating to start url: {self.start_url}")
111
- page.goto(self.start_url, timeout=50000)
112
- if self.save_predictions and self.output_file:
113
- # create an empty task entry in the output file (will raise an Exception if the entry is already there)
114
- add_prediction_to_jsonl(
115
- file_path=self.output_file,
116
- task_id=self.ab_task_id,
117
- prediction="",
118
- override_if_exists=False,
119
- )
120
- return self.goal, {}
121
-
122
- def teardown(self) -> None:
123
- pass
124
-
125
- def validate(self, page: Page, chat_messages: list[dict]) -> Tuple[float, bool, str, dict]:
126
- accuracy, done, msg, info = 0.0, False, "", {}
127
-
128
- # eval when the agent returns a response
129
- if chat_messages and chat_messages[-1]["role"] == "assistant":
130
- done = True
131
- prediction = chat_messages[-1]["message"]
132
- if self.save_predictions and self.output_file:
133
- # update the task entry in the output file
134
- add_prediction_to_jsonl(
135
- file_path=self.output_file,
136
- task_id=self.ab_task_id,
137
- prediction=prediction,
138
- override_if_exists=True,
139
- )
140
- accuracy, has_ans = question_scorer(prediction, self.gold)
141
-
142
- return accuracy, done, msg, info
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/utils.py DELETED
@@ -1,73 +0,0 @@
1
- import json
2
- import logging
3
- import os
4
- import pathlib
5
- import time
6
-
7
- logger = logging.getLogger(__name__)
8
-
9
-
10
- def add_prediction_to_jsonl(
11
- file_path: str, task_id: str, prediction: object, override_if_exists: bool
12
- ) -> None:
13
- """
14
- Multiprocessing-safe file write.
15
- """
16
- lock_file_path = pathlib.Path(file_path).with_suffix(".lock")
17
- lock_max_wait = 10 # 10 seconds
18
-
19
- # Acquire lock (atomic file creation)
20
- start_time = time.time()
21
- while True:
22
- try:
23
- fd = os.open(lock_file_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
24
- with os.fdopen(fd, "w") as f:
25
- f.write("lock")
26
- break
27
- except FileExistsError:
28
- # give up if max wait time reached
29
- seconds_waited = time.time() - start_time
30
- if seconds_waited >= lock_max_wait:
31
- raise RuntimeError(
32
- f"Lock file could not be acquired after {seconds_waited} seconds ({lock_file_path})"
33
- )
34
- # wait for lock release
35
- logger.info(f"Waiting for lock file to be released: {lock_file_path}")
36
- time.sleep(1) # 1 sec
37
-
38
- logger.info(f"Lock file acquired: {lock_file_path}")
39
-
40
- # Check if the file exists, if not, create it
41
- if not os.path.exists(file_path):
42
- with open(file_path, "w") as f:
43
- pass # Create an empty file
44
-
45
- # Load existing data, if any
46
- data = []
47
- if os.path.exists(file_path):
48
- with open(file_path, "r") as f:
49
- data.extend([json.loads(line) for line in f if line.strip()]) # Skip empty lines
50
-
51
- # Check if task_id already exists
52
- existing_record = next((entry for entry in data if entry["id"] == task_id), None)
53
-
54
- # Add or update the record
55
- if not existing_record:
56
- # Add new record
57
- data.append({"id": task_id, "answer": prediction})
58
- elif override_if_exists:
59
- # Update existing record
60
- existing_record["answer"] = prediction
61
- else:
62
- raise ValueError(
63
- f"Prediction for task ID {repr(task_id)} already exists in file {file_path}."
64
- )
65
-
66
- # Write data back to the file
67
- with open(file_path, "w") as f:
68
- for entry in data:
69
- f.write(json.dumps(entry) + "\n")
70
-
71
- # Release lock (remove file)
72
- os.remove(lock_file_path)
73
- logger.info(f"Lock file released: {lock_file_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/browsergym.egg-info/PKG-INFO DELETED
@@ -1,22 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: browsergym
3
- Version: 0.13.4
4
- Summary: BrowserGym: a gym environment for web task automation in the Chromium browser
5
- Author: Rim Assouel, Léo Boisvert, Massimo Caccia, Alex Drouin, Maxime Gasse, Imene Kerboua, Alex Lacoste, Thibault Le Sellier De Chezelles, Tom Marty, Aman Jaiswal
6
- License: Apache-2.0
7
- Classifier: Development Status :: 3 - Alpha
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Operating System :: OS Independent
10
- Classifier: Intended Audience :: Science/Research
11
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
- Classifier: License :: OSI Approved :: Apache Software License
13
- Requires-Python: >3.10
14
- Description-Content-Type: text/markdown
15
- Requires-Dist: browsergym-core==0.13.4
16
- Requires-Dist: browsergym-miniwob==0.13.4
17
- Requires-Dist: browsergym-webarena==0.13.4
18
- Requires-Dist: browsergym-visualwebarena==0.13.4
19
- Requires-Dist: browsergym-assistantbench==0.13.4
20
- Requires-Dist: browsergym-experiments==0.13.4
21
- Requires-Dist: browsergym-workarena>=0.4.1
22
- Requires-Dist: weblinx-browsergym>=0.0.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/browsergym.egg-info/SOURCES.txt DELETED
@@ -1,6 +0,0 @@
1
- pyproject.toml
2
- browsergym.egg-info/PKG-INFO
3
- browsergym.egg-info/SOURCES.txt
4
- browsergym.egg-info/dependency_links.txt
5
- browsergym.egg-info/requires.txt
6
- browsergym.egg-info/top_level.txt
 
 
 
 
 
 
 
BrowserGym/browsergym/browsergym.egg-info/dependency_links.txt DELETED
@@ -1 +0,0 @@
1
-
 
 
BrowserGym/browsergym/browsergym.egg-info/requires.txt DELETED
@@ -1,8 +0,0 @@
1
- browsergym-core==0.13.4
2
- browsergym-miniwob==0.13.4
3
- browsergym-webarena==0.13.4
4
- browsergym-visualwebarena==0.13.4
5
- browsergym-assistantbench==0.13.4
6
- browsergym-experiments==0.13.4
7
- browsergym-workarena>=0.4.1
8
- weblinx-browsergym>=0.0.2
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/browsergym.egg-info/top_level.txt DELETED
@@ -1 +0,0 @@
1
-
 
 
BrowserGym/browsergym/core/README.md DELETED
@@ -1,10 +0,0 @@
1
- # BrowserGym core
2
-
3
- This package provides `browsergym.core`, which provides the core functionalities of [BrowserGym](https://github.com/ServiceNow/BrowserGym).
4
-
5
- ## Setup
6
-
7
- 1. Install the package
8
- ```sh
9
- pip install browsergym-core
10
- ```
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/pyproject.toml DELETED
@@ -1,42 +0,0 @@
1
- [build-system]
2
- requires = ["hatchling", "hatch-requirements-txt"]
3
- build-backend = "hatchling.build"
4
-
5
- [project]
6
- name = "browsergym-core"
7
- description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
8
- authors = [
9
- {name = "Rim Assouel"},
10
- {name = "Léo Boisvert"},
11
- {name = "Massimo Caccia"},
12
- {name = "Alex Drouin"},
13
- {name = "Maxime Gasse"},
14
- {name = "Imene Kerboua"},
15
- {name = "Alex Lacoste"},
16
- {name = "Thibault Le Sellier De Chezelles"},
17
- {name = "Tom Marty"},
18
- ]
19
- readme = "README.md"
20
- requires-python = ">3.9"
21
- license = {text = "Apache-2.0"}
22
- classifiers = [
23
- "Development Status :: 3 - Alpha",
24
- "Programming Language :: Python :: 3",
25
- "Operating System :: OS Independent",
26
- "Intended Audience :: Science/Research",
27
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
28
- "License :: OSI Approved :: Apache Software License",
29
- ]
30
- dynamic = ["dependencies", "version"]
31
-
32
- [project.urls]
33
- homepage = "https://github.com/ServiceNow/BrowserGym"
34
-
35
- [tool.hatch.version]
36
- path = "src/browsergym/core/__init__.py"
37
-
38
- [tool.hatch.metadata.hooks.requirements_txt]
39
- files = ["requirements.txt"]
40
-
41
- [tool.hatch.build.targets.wheel]
42
- packages = ["src/browsergym"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/requirements.txt DELETED
@@ -1,8 +0,0 @@
1
- playwright==1.44
2
- gymnasium>=0.27
3
- numpy>=1.14
4
- pyparsing>=3
5
- Pillow>=10.1
6
- beautifulsoup4>=4.12
7
- lxml>=4.9
8
- mcp[cli]>=1.6.0
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/__init__.py DELETED
@@ -1,27 +0,0 @@
1
- __version__ = "0.13.4"
2
-
3
- import playwright.sync_api
4
-
5
- # we use a global playwright instance
6
- _PLAYWRIGHT = None
7
-
8
-
9
- def _set_global_playwright(pw: playwright.sync_api.Playwright):
10
- global _PLAYWRIGHT
11
- _PLAYWRIGHT = pw
12
-
13
-
14
- def _get_global_playwright():
15
- global _PLAYWRIGHT
16
- if not _PLAYWRIGHT:
17
- pw = playwright.sync_api.sync_playwright().start()
18
- _set_global_playwright(pw)
19
-
20
- return _PLAYWRIGHT
21
-
22
-
23
- # register the open-ended task
24
- from .registration import register_task
25
- from .task import OpenEndedTask
26
-
27
- register_task(OpenEndedTask.get_task_id(), OpenEndedTask)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (1.14 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/chat.cpython-311.pyc DELETED
Binary file (6.89 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/constants.cpython-311.pyc DELETED
Binary file (428 Bytes)
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/env.cpython-311.pyc DELETED
Binary file (31.2 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/observation.cpython-311.pyc DELETED
Binary file (22.7 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/registration.cpython-311.pyc DELETED
Binary file (3.49 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/spaces.cpython-311.pyc DELETED
Binary file (8.42 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/task.cpython-311.pyc DELETED
Binary file (5.53 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/action/__init__.py DELETED
@@ -1,11 +0,0 @@
1
- _DEMO_MODE = False
2
-
3
-
4
- def set_global_demo_mode(demo_mode: bool):
5
- global _DEMO_MODE
6
- _DEMO_MODE = demo_mode
7
-
8
-
9
- def get_global_demo_mode():
10
- global _DEMO_MODE
11
- return _DEMO_MODE
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (561 Bytes)
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/base.cpython-311.pyc DELETED
Binary file (3.12 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/functions.cpython-311.pyc DELETED
Binary file (26.2 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/highlevel.cpython-311.pyc DELETED
Binary file (12.4 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/parsers.cpython-311.pyc DELETED
Binary file (6.82 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/utils.cpython-311.pyc DELETED
Binary file (12.2 kB)
 
BrowserGym/browsergym/core/src/browsergym/core/action/base.py DELETED
@@ -1,63 +0,0 @@
1
- from abc import ABC, abstractmethod
2
-
3
- import playwright.sync_api
4
-
5
- from . import get_global_demo_mode
6
-
7
-
8
- class AbstractActionSet(ABC):
9
- def __init__(self, strict: bool = False):
10
- self.strict = strict
11
-
12
- @abstractmethod
13
- def describe(self, with_long_description: bool = True, with_examples: bool = True) -> str:
14
- """
15
- Returns a textual description of this action space.
16
- """
17
-
18
- @abstractmethod
19
- def example_action(self, abstract: bool) -> str:
20
- """
21
- Returns an example action as a string.
22
- """
23
-
24
- @abstractmethod
25
- def to_python_code(self, action) -> str:
26
- """
27
- Converts the given action to browsergym-compatible python code.
28
-
29
- Args:
30
- action: the action to convert.
31
-
32
- Returns:
33
- Executable python code that performs the action in a browsergym environment.
34
- """
35
-
36
-
37
- def execute_python_code(
38
- code: str,
39
- page: playwright.sync_api.Page,
40
- send_message_to_user: callable,
41
- report_infeasible_instructions: callable,
42
- ):
43
- """
44
- Executes Python code in a new context, except for a playwright `page` object and a `send_message_to_user` function.
45
-
46
- WARNING: this is not safe!
47
- https://stackoverflow.com/questions/77655440/can-you-protect-a-python-variable-with-exec
48
-
49
- Args:
50
- code: the Python code to execute, as a string.
51
- page: the playwright page that will be made accessible to the code.
52
- send_message_to_user: utility function that will be made accessible to the code. It should take one text argument.
53
- report_infeasible_instructions: utility function that will be made accessible to the code. It should take one text argument.
54
- """
55
-
56
- globals = {
57
- "page": page,
58
- "send_message_to_user": send_message_to_user,
59
- "report_infeasible_instructions": report_infeasible_instructions,
60
- "DEMO_MODE": get_global_demo_mode(),
61
- }
62
-
63
- exec(code, globals)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/action/functions.py DELETED
@@ -1,624 +0,0 @@
1
- # these are placeholders
2
- # all these symbols will be available in browsergym actions
3
- from typing import Literal
4
-
5
- import playwright.sync_api
6
-
7
- from .utils import (
8
- add_demo_mode_effects,
9
- call_fun,
10
- get_elem_by_bid,
11
- highlight_by_box,
12
- smooth_move_visual_cursor_to,
13
- )
14
-
15
- page: playwright.sync_api.Page = None
16
- send_message_to_user: callable = None
17
- report_infeasible_instructions: callable = None
18
- demo_mode: Literal["off", "default", "all_blue", "only_visible_elements"] = None
19
- retry_with_force: bool = False
20
-
21
- """IMPORTANT
22
- The following primitives are meant to be included in the browsergym action using
23
- inspect.getsource().
24
- """
25
-
26
-
27
- def send_msg_to_user(text: str):
28
- """
29
- Sends a message to the user.
30
-
31
- Examples:
32
- send_msg_to_user("Based on the results of my search, the city was built in 1751.")
33
- """
34
- send_message_to_user(text)
35
-
36
-
37
- def report_infeasible(reason: str):
38
- """
39
- Notifies the user that their instructions are infeasible.
40
-
41
- Examples:
42
- report_infeasible("I cannot follow these instructions because there is no email field in this form.")
43
- """
44
- report_infeasible_instructions(reason)
45
-
46
-
47
- def noop(wait_ms: float = 1000):
48
- """
49
- Do nothing, and optionally wait for the given time (in milliseconds).
50
-
51
- Examples:
52
- noop()
53
- noop(500)
54
- """
55
- page.wait_for_timeout(wait_ms)
56
-
57
-
58
- # https://playwright.dev/docs/input#text-input
59
- def fill(bid: str, value: str):
60
- """
61
- Fill out a form field. It focuses the element and triggers an input event with the entered text.
62
- It works for <input>, <textarea> and [contenteditable] elements.
63
-
64
- Examples:
65
- fill('237', 'example value')
66
- fill('45', "multi-line\\nexample")
67
- fill('a12', "example with \\"quotes\\"")
68
- """
69
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
70
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
71
-
72
- def do(force: bool):
73
- if demo_mode != "off":
74
- delay = max(2000 / len(value), 10)
75
- elem.clear(force=force, timeout=500)
76
- elem.type(value, delay=delay, timeout=0) # no timeout
77
- else:
78
- elem.fill(value, force=force, timeout=500)
79
-
80
- call_fun(do, retry_with_force)
81
-
82
-
83
- # https://playwright.dev/python/docs/api/class-locator#locator-check
84
- def check(bid: str):
85
- """
86
- Ensure a checkbox or radio element is checked.
87
-
88
- Examples:
89
- check('55')
90
- """
91
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
92
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
93
-
94
- def do(force: bool):
95
- elem.check(force=force, timeout=500)
96
-
97
- call_fun(do, retry_with_force)
98
-
99
-
100
- # https://playwright.dev/python/docs/api/class-locator#locator-uncheck
101
- def uncheck(bid: str):
102
- """
103
- Ensure a checkbox or radio element is unchecked.
104
-
105
- Examples:
106
- uncheck('a5289')
107
- """
108
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
109
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
110
-
111
- def do(force: bool):
112
- elem.uncheck(force=force, timeout=500)
113
-
114
- call_fun(do, retry_with_force)
115
-
116
-
117
- # https://playwright.dev/docs/input#select-options
118
- def select_option(bid: str, options: str | list[str]):
119
- """
120
- Select one or multiple options in a <select> element. You can specify
121
- option value or label to select. Multiple options can be selected.
122
-
123
- Examples:
124
- select_option('a48', "blue")
125
- select_option('c48', ["red", "green", "blue"])
126
- """
127
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
128
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
129
-
130
- def do(force: bool):
131
- elem.select_option(options, force=force, timeout=500)
132
-
133
- call_fun(do, retry_with_force)
134
-
135
-
136
- # https://playwright.dev/python/docs/api/class-locator#locator-click
137
- def click(
138
- bid: str,
139
- button: Literal["left", "middle", "right"] = "left",
140
- modifiers: list[Literal["Alt", "Control", "ControlOrMeta", "Meta", "Shift"]] = [],
141
- ):
142
- """
143
- Click an element.
144
-
145
- Examples:
146
- click('a51')
147
- click('b22', button="right")
148
- click('48', button="middle", modifiers=["Shift"])
149
- """
150
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
151
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
152
-
153
- def do(force: bool):
154
- elem.click(button=button, modifiers=modifiers, force=force, timeout=500)
155
-
156
- call_fun(do, retry_with_force)
157
-
158
-
159
- # https://playwright.dev/python/docs/api/class-locator#locator-dblclick
160
- def dblclick(
161
- bid: str,
162
- button: Literal["left", "middle", "right"] = "left",
163
- modifiers: list[Literal["Alt", "Control", "ControlOrMeta", "Meta", "Shift"]] = [],
164
- ):
165
- """
166
- Double click an element.
167
-
168
- Examples:
169
- dblclick('12')
170
- dblclick('ca42', button="right")
171
- dblclick('178', button="middle", modifiers=["Shift"])
172
- """
173
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
174
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
175
-
176
- def do(force: bool):
177
- elem.click(button=button, modifiers=modifiers, force=force, timeout=500)
178
-
179
- call_fun(do, retry_with_force)
180
-
181
-
182
- # https://playwright.dev/python/docs/api/class-locator#locator-hover
183
- def hover(bid: str):
184
- """
185
- Hover over an element.
186
-
187
- Examples:
188
- hover('b8')
189
- """
190
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
191
- add_demo_mode_effects(
192
- page, elem, bid, demo_mode=demo_mode, move_cursor=True, highlight_box=False
193
- )
194
-
195
- def do(force: bool):
196
- elem.hover(force=force, timeout=500)
197
-
198
- call_fun(do, retry_with_force)
199
-
200
-
201
- # https://playwright.dev/python/docs/input#keys-and-shortcuts
202
- def press(bid: str, key_comb: str):
203
- """
204
- Focus the matching element and press a combination of keys. It accepts
205
- the logical key names that are emitted in the keyboardEvent.key property
206
- of the keyboard events: Backquote, Minus, Equal, Backslash, Backspace,
207
- Tab, Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
208
- ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
209
- alternatively specify a single character you'd like to produce such as "a"
210
- or "#". Following modification shortcuts are also supported: Shift, Control,
211
- Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta resolves to Control on
212
- Windows and Linux and to Meta on macOS.
213
-
214
- Examples:
215
- press('88', 'Backspace')
216
- press('a26', 'ControlOrMeta+a')
217
- press('a61', 'Meta+Shift+t')
218
- """
219
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
220
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
221
- elem.press(key_comb, timeout=500)
222
-
223
-
224
- # https://playwright.dev/python/docs/api/class-locator#locator-focus
225
- def focus(bid: str):
226
- """
227
- Focus the matching element.
228
-
229
- Examples:
230
- focus('b455')
231
- """
232
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
233
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
234
- elem.focus(timeout=500)
235
-
236
-
237
- # https://playwright.dev/python/docs/api/class-locator#locator-clear
238
- def clear(bid: str):
239
- """
240
- Clear the input field.
241
-
242
- Examples:
243
- clear('996')
244
- """
245
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
246
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
247
- elem.clear(timeout=500)
248
-
249
-
250
- # https://playwright.dev/python/docs/input#drag-and-drop
251
- def drag_and_drop(from_bid: str, to_bid: str):
252
- """
253
- Perform a drag & drop. Hover the element that will be dragged. Press
254
- left mouse button. Move mouse to the element that will receive the
255
- drop. Release left mouse button.
256
-
257
- Examples:
258
- drag_and_drop('56', '498')
259
- """
260
- from_elem = get_elem_by_bid(page, from_bid, demo_mode != "off")
261
- add_demo_mode_effects(page, from_elem, from_bid, demo_mode=demo_mode, move_cursor=True)
262
- from_elem.hover(timeout=500)
263
- page.mouse.down()
264
-
265
- to_elem = get_elem_by_bid(page, to_bid, demo_mode != "off")
266
- add_demo_mode_effects(page, to_elem, to_bid, demo_mode=demo_mode, move_cursor=True)
267
- to_elem.hover(timeout=500)
268
- page.mouse.up()
269
-
270
-
271
- # https://playwright.dev/python/docs/api/class-mouse#mouse-wheel
272
- def scroll(delta_x: float, delta_y: float):
273
- """
274
- Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event.
275
-
276
- Examples:
277
- scroll(0, 200)
278
- scroll(-50.2, -100.5)
279
- """
280
- page.mouse.wheel(delta_x, delta_y)
281
-
282
-
283
- # https://playwright.dev/python/docs/api/class-mouse#mouse-move
284
- def mouse_move(x: float, y: float):
285
- """
286
- Move the mouse to a location. Uses absolute client coordinates in pixels.
287
- Dispatches a mousemove event.
288
-
289
- Examples:
290
- mouse_move(65.2, 158.5)
291
- """
292
- if demo_mode != "off":
293
- smooth_move_visual_cursor_to(page, x, y)
294
- page.mouse.move(x, y)
295
-
296
-
297
- # https://playwright.dev/python/docs/api/class-mouse#mouse-up
298
- def mouse_up(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
299
- """
300
- Move the mouse to a location then release a mouse button. Dispatches
301
- mousemove and mouseup events.
302
-
303
- Examples:
304
- mouse_up(250, 120)
305
- mouse_up(47, 252, 'right')
306
- """
307
- if demo_mode != "off":
308
- smooth_move_visual_cursor_to(page, x, y)
309
- highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
310
- page.mouse.move(x, y)
311
- page.mouse.up(button=button)
312
-
313
-
314
- # https://playwright.dev/python/docs/api/class-mouse#mouse-down
315
- def mouse_down(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
316
- """
317
- Move the mouse to a location then press and hold a mouse button. Dispatches
318
- mousemove and mousedown events.
319
-
320
- Examples:
321
- mouse_down(140.2, 580.1)
322
- mouse_down(458, 254.5, 'middle')
323
- """
324
- if demo_mode != "off":
325
- smooth_move_visual_cursor_to(page, x, y)
326
- highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
327
- page.mouse.move(x, y)
328
- page.mouse.down(button=button)
329
-
330
-
331
- # https://playwright.dev/python/docs/api/class-mouse#mouse-click
332
- def mouse_click(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
333
- """
334
- Move the mouse to a location and click a mouse button. Dispatches mousemove,
335
- mousedown and mouseup events.
336
-
337
- Examples:
338
- mouse_click(887.2, 68)
339
- mouse_click(56, 712.56, 'right')
340
- """
341
- if demo_mode != "off":
342
- smooth_move_visual_cursor_to(page, x, y)
343
- highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
344
- page.mouse.click(x, y, button=button)
345
-
346
-
347
- # https://playwright.dev/python/docs/api/class-mouse#mouse-dblclick
348
- def mouse_dblclick(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
349
- """
350
- Move the mouse to a location and double click a mouse button. Dispatches
351
- mousemove, mousedown and mouseup events.
352
-
353
- Examples:
354
- mouse_dblclick(5, 236)
355
- mouse_dblclick(87.5, 354, 'right')
356
- """
357
- if demo_mode != "off":
358
- smooth_move_visual_cursor_to(page, x, y)
359
- highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
360
- page.mouse.dblclick(x, y, button=button)
361
-
362
-
363
- def mouse_drag_and_drop(from_x: float, from_y: float, to_x: float, to_y: float):
364
- """
365
- Drag and drop from a location to a location. Uses absolute client
366
- coordinates in pixels. Dispatches mousemove, mousedown and mouseup
367
- events.
368
-
369
- Examples:
370
- mouse_drag_and_drop(10.7, 325, 235.6, 24.54)
371
- """
372
- if demo_mode != "off":
373
- x, y = from_x, from_y
374
- smooth_move_visual_cursor_to(page, x, y)
375
- highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
376
- page.mouse.move(from_x, from_y)
377
- page.mouse.down()
378
- if demo_mode != "off":
379
- x, y = to_x, to_y
380
- smooth_move_visual_cursor_to(page, x, y)
381
- highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
382
- page.mouse.move(to_x, to_y)
383
- page.mouse.up()
384
-
385
-
386
- # https://playwright.dev/python/docs/api/class-keyboard#keyboard-press
387
- def keyboard_press(key: str):
388
- """
389
- Press a combination of keys. Accepts the logical key names that are
390
- emitted in the keyboardEvent.key property of the keyboard events:
391
- Backquote, Minus, Equal, Backslash, Backspace, Tab, Delete, Escape,
392
- ArrowDown, End, Enter, Home, Insert, PageDown, PageUp, ArrowRight,
393
- ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
394
- alternatively specify a single character you'd like to produce such
395
- as "a" or "#". Following modification shortcuts are also supported:
396
- Shift, Control, Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta
397
- resolves to Control on Windows and Linux and to Meta on macOS.
398
-
399
- Examples:
400
- keyboard_press('Backspace')
401
- keyboard_press('ControlOrMeta+a')
402
- keyboard_press('Meta+Shift+t')
403
- page.keyboard.press("PageDown")
404
- """
405
- page.keyboard.press(key)
406
-
407
-
408
- # https://playwright.dev/python/docs/api/class-keyboard#keyboard-up
409
- def keyboard_up(key: str):
410
- """
411
- Release a keyboard key. Dispatches a keyup event. Accepts the logical
412
- key names that are emitted in the keyboardEvent.key property of the
413
- keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab,
414
- Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
415
- ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc.
416
- You can alternatively specify a single character you'd like to produce
417
- such as "a" or "#".
418
-
419
- Examples:
420
- keyboard_up('Shift')
421
- keyboard_up('c')
422
- """
423
- page.keyboard.up(key)
424
-
425
-
426
- # https://playwright.dev/python/docs/api/class-keyboard#keyboard-down
427
- def keyboard_down(key: str):
428
- """
429
- Press and holds a keyboard key. Dispatches a keydown event. Accepts the
430
- logical key names that are emitted in the keyboardEvent.key property of
431
- the keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab,
432
- Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
433
- ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
434
- alternatively specify a single character such as "a" or "#".
435
-
436
- Examples:
437
- keyboard_up('Shift')
438
- keyboard_up('c')
439
- """
440
- page.keyboard.down(key)
441
-
442
-
443
- # https://playwright.dev/python/docs/api/class-keyboard#keyboard-type
444
- def keyboard_type(text: str):
445
- """
446
- Types a string of text through the keyboard. Sends a keydown, keypress/input,
447
- and keyup event for each character in the text. Modifier keys DO NOT affect
448
- keyboard_type. Holding down Shift will not type the text in upper case.
449
-
450
- Examples:
451
- keyboard_type('Hello world!')
452
- """
453
- if demo_mode != "off":
454
- delay = max(2000 / len(text), 10)
455
- else:
456
- delay = None
457
- page.keyboard.type(text, delay=delay)
458
-
459
-
460
- # https://playwright.dev/python/docs/api/class-keyboard#keyboard-insert-text
461
- def keyboard_insert_text(text: str):
462
- """
463
- Insert a string of text in the currently focused element. Dispatches only input
464
- event, does not emit the keydown, keyup or keypress events. Modifier keys DO NOT
465
- affect keyboard_insert_text. Holding down Shift will not type the text in upper
466
- case.
467
-
468
- Examples:
469
- keyboard_insert_text('Hello world!')
470
- """
471
- page.keyboard.insert_text(text)
472
-
473
-
474
- # https://playwright.dev/python/docs/api/class-page#page-goto
475
- def goto(url: str):
476
- """
477
- Navigate to a url.
478
-
479
- Examples:
480
- goto('http://www.example.com')
481
- """
482
- page.goto(url)
483
-
484
-
485
- # https://playwright.dev/python/docs/api/class-page#page-go-back
486
- def go_back():
487
- """
488
- Navigate to the previous page in history.
489
-
490
- Examples:
491
- go_back()
492
- """
493
- page.go_back()
494
-
495
-
496
- # https://playwright.dev/python/docs/api/class-page#page-go-forward
497
- def go_forward():
498
- """
499
- Navigate to the next page in history.
500
-
501
- Examples:
502
- go_forward()
503
- """
504
- page.go_forward()
505
-
506
-
507
- # https://playwright.dev/python/docs/api/class-browsercontext#browser-context-new-page
508
- def new_tab():
509
- """
510
- Open a new tab. It will become the active one.
511
-
512
- Examples:
513
- new_tab()
514
- """
515
- global page
516
- # set the new page as the active page
517
- page = page.context.new_page()
518
- # trigger the callback that sets this page as active in browsergym
519
- page.evaluate(
520
- """\
521
- const event = new Event('pageshow', {
522
- bubbles: true, // Whether the event bubbles up through the DOM or not
523
- cancelable: false // Whether the event can be canceled
524
- });
525
- window.dispatchEvent(event);
526
- """
527
- )
528
-
529
-
530
- # https://playwright.dev/python/docs/api/class-page#page-close
531
- def tab_close():
532
- """
533
- Close the current tab.
534
-
535
- Examples:
536
- tab_close()
537
- """
538
- global page
539
- context = page.context
540
- page.close()
541
- # set most recent page as active page, or open a new page if needed
542
- if context.pages:
543
- # TODO: do something more elaborate? (active page history)
544
- page = context.pages[-1]
545
- else:
546
- page = context.new_page()
547
- # trigger the callback that sets this page as active in browsergym
548
- page.evaluate(
549
- """\
550
- const event = new Event('pageshow', {
551
- bubbles: true, // Whether the event bubbles up through the DOM or not
552
- cancelable: false // Whether the event can be canceled
553
- });
554
- window.dispatchEvent(event);
555
- """
556
- )
557
-
558
-
559
- # https://playwright.dev/python/docs/api/class-page#page-bring-to-front
560
- def tab_focus(index: int):
561
- """
562
- Bring tab to front (activate tab).
563
-
564
- Examples:
565
- tab_focus(2)
566
- """
567
- global page # set the focused page as the active page
568
- page = page.context.pages[index]
569
- page.bring_to_front()
570
- # trigger the callback that sets this page as active in browsergym
571
- page.evaluate(
572
- """\
573
- const event = new Event('pageshow', {
574
- bubbles: true, // Whether the event bubbles up through the DOM or not
575
- cancelable: false // Whether the event can be canceled
576
- });
577
- window.dispatchEvent(event);
578
- """
579
- )
580
-
581
-
582
- # https://playwright.dev/python/docs/input#upload-files
583
- def upload_file(bid: str, file: str | list[str]):
584
- """
585
- Click an element and wait for a "filechooser" event, then select one
586
- or multiple input files for upload. Relative file paths are resolved
587
- relative to the current working directory. An empty list clears the
588
- selected files.
589
-
590
- Examples:
591
- upload_file("572", "my_receipt.pdf")
592
- upload_file("63", ["/home/bob/Documents/image.jpg", "/home/bob/Documents/file.zip"])
593
- """
594
- elem = get_elem_by_bid(page, bid, demo_mode != "off")
595
- add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
596
-
597
- with page.expect_file_chooser() as fc_info:
598
- elem.click(timeout=500)
599
-
600
- file_chooser = fc_info.value
601
- file_chooser.set_files(file)
602
-
603
-
604
- # https://playwright.dev/python/docs/input#upload-files
605
- def mouse_upload_file(x: float, y: float, file: str | list[str]):
606
- """
607
- Click a location and wait for a "filechooser" event, then select one
608
- or multiple input files for upload. Relative file paths are resolved
609
- relative to the current working directory. An empty list clears the
610
- selected files.
611
-
612
- Examples:
613
- mouse_upload_file(132.1, 547, "my_receipt.pdf")
614
- mouse_upload_file(328, 812, ["/home/bob/Documents/image.jpg", "/home/bob/Documents/file.zip"])
615
- """
616
- if demo_mode != "off":
617
- smooth_move_visual_cursor_to(page, x, y)
618
- highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
619
-
620
- with page.expect_file_chooser() as fc_info:
621
- page.mouse.click(x, y)
622
-
623
- file_chooser = fc_info.value
624
- file_chooser.set_files(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/action/highlevel.py DELETED
@@ -1,522 +0,0 @@
1
- import inspect
2
- import random
3
- import typing
4
- from dataclasses import dataclass
5
-
6
- from . import utils
7
- from .base import AbstractActionSet
8
- from .functions import ( # check,; uncheck,
9
- clear,
10
- click,
11
- dblclick,
12
- drag_and_drop,
13
- fill,
14
- focus,
15
- go_back,
16
- go_forward,
17
- goto,
18
- hover,
19
- keyboard_down,
20
- keyboard_insert_text,
21
- keyboard_press,
22
- keyboard_type,
23
- keyboard_up,
24
- mouse_click,
25
- mouse_dblclick,
26
- mouse_down,
27
- mouse_drag_and_drop,
28
- mouse_move,
29
- mouse_up,
30
- mouse_upload_file,
31
- new_tab,
32
- noop,
33
- press,
34
- report_infeasible,
35
- scroll,
36
- select_option,
37
- send_msg_to_user,
38
- tab_close,
39
- tab_focus,
40
- upload_file,
41
- )
42
- from .parsers import action_docstring_parser, highlevel_action_parser
43
-
44
- ACTION_SUBSETS = {
45
- "chat": [send_msg_to_user],
46
- "infeas": [report_infeasible],
47
- "bid": [
48
- scroll,
49
- fill,
50
- # These are not really needed and might pollute the action space, doing more harm than good
51
- # check,
52
- # uncheck,
53
- select_option,
54
- click,
55
- dblclick,
56
- hover,
57
- press,
58
- focus,
59
- clear,
60
- drag_and_drop,
61
- upload_file,
62
- ],
63
- "coord": [
64
- scroll,
65
- mouse_move,
66
- mouse_up,
67
- mouse_down,
68
- mouse_click,
69
- mouse_dblclick,
70
- mouse_drag_and_drop,
71
- mouse_upload_file,
72
- keyboard_down,
73
- keyboard_up,
74
- keyboard_press,
75
- keyboard_type,
76
- keyboard_insert_text,
77
- ],
78
- "nav": [go_back, go_forward, goto],
79
- "tab": [
80
- tab_close,
81
- tab_focus,
82
- new_tab,
83
- ],
84
- # adapted from MiniWoB repo
85
- # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L122
86
- "miniwob_all": [
87
- mouse_move, # MOVE_COORDS
88
- mouse_click, # CLICK_COORDS
89
- mouse_dblclick, # DBLCLICK_COORDS
90
- mouse_down, # MOUSEDOWN_COORDS
91
- mouse_up, # MOUSEUP_COORDS
92
- scroll, # SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
93
- click, # CLICK_ELEMENT
94
- keyboard_press, # PRESS_KEY
95
- keyboard_type, # TYPE_TEX (and substitute for TYPE_FIELD()
96
- fill, # FOCUS_ELEMENT_AND_TYPE_TEXT (and substitute for FOCUS_ELEMENT_AND_TYPE_FIELD)
97
- ],
98
- # adapted from MiniWoB repo
99
- # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L142
100
- "miniwob_shi17": [
101
- mouse_click, # CLICK_COORDS
102
- mouse_dblclick, # DBLCLICK_COORDS
103
- mouse_down, # MOUSEDOWN_COORDS
104
- mouse_up, # MOUSEUP_COORDS
105
- scroll, # SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
106
- keyboard_press, # PRESS_KEY
107
- ],
108
- # adapted from MiniWoB repo
109
- # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L160
110
- "miniwob_liu18": [
111
- click, # CLICK_ELEMENT
112
- fill, # substitute for FOCUS_ELEMENT_AND_TYPE_FIELD
113
- ],
114
- # adapted from MiniWoB repo
115
- # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L173
116
- "miniwob_humphreys22": [
117
- mouse_move, # MOVE_COORDS
118
- mouse_click, # CLICK_COORDS
119
- mouse_dblclick, # DBLCLICK_COORDS
120
- mouse_down, # MOUSEDOWN_COORDS
121
- mouse_up, # MOUSEUP_COORDS
122
- scroll, # SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
123
- keyboard_press, # PRESS_KEY
124
- keyboard_type, # substitute for TYPE_FIELD
125
- ],
126
- # from the webarena paper
127
- # https://arxiv.org/abs/2307.13854
128
- # from the webarena source code
129
- # https://github.com/web-arena-x/webarena/blob/e31c190c9b43f63e5724322b847e00249300df40/browser_env/actions.py#L240
130
- # from the webarena default prompt
131
- # https://github.com/web-arena-x/webarena/blob/e31c190c9b43f63e5724322b847e00249300df40/agent/prompts/raw/p_cot_id_actree_2s.py#L13
132
- "webarena": [
133
- # # code | paper | prompt
134
- scroll, # SCROLL | scroll(dir) | scroll [down|up]
135
- keyboard_press, # KEY_PRESS | press(key_comb) | press [key_comb]
136
- # MOUSE_CLICK | |
137
- # KEYBOARD_TYPE | |
138
- # MOUSE_HOVER | |
139
- click, # CLICK | click(elem) | click [id]
140
- fill, # TYPE | type(elem, text) | type [id] [content]
141
- hover, # HOVER | hover(elem) | hover [id]
142
- tab_focus, # PAGE_FOCUS | tab_focus(index) | tab_focus [tab_index]
143
- new_tab, # NEW_TAB | new_tab() | new_tab
144
- go_back, # GO_BACK | go_back() | go_back
145
- go_forward, # GO_FORWARD | go_forward() | go_forward
146
- goto, # GOTO_URL | goto(url) | goto [url]
147
- tab_close, # PAGE_CLOSE | tab_close() | close_tab
148
- # CHECK | |
149
- select_option, # SELECT_OPTION | |
150
- send_msg_to_user, # STOP | stop(answer) | stop [answer]
151
- report_infeasible, ## explicit unachievable action, equivalent STOP "N/A"
152
- ],
153
- # from the visualwebarena paper
154
- # https://arxiv.org/abs/2401.13649
155
- # from the visualwebarena source code
156
- # https://github.com/web-arena-x/visualwebarena/blob/15890922c97a8694e366fde2d7de8dbd1ff63fb5/browser_env/actions.py#L311-L343
157
- # from the visualwebarena default prompt
158
- # https://github.com/web-arena-x/visualwebarena/blob/15890922c97a8694e366fde2d7de8dbd1ff63fb5/agent/prompts/jsons/p_cot_id_actree_3s.json#L2
159
- "visualwebarena": [
160
- # # code | paper | prompt
161
- scroll, # SCROLL | scroll(dir) | scroll [down|up]
162
- keyboard_press, # KEY_PRESS | press(key_comb) | press [key_comb]
163
- # MOUSE_CLICK | |
164
- # KEYBOARD_TYPE | |
165
- # MOUSE_HOVER | |
166
- click, # CLICK | click(elem) | click [id]
167
- fill, # TYPE | type(elem, text) | type [id] [content]
168
- hover, # HOVER | hover(elem) | hover [id]
169
- tab_focus, # PAGE_FOCUS | tab_focus(index) | tab_focus [tab_index]
170
- new_tab, # NEW_TAB | new_tab() | new_tab
171
- go_back, # GO_BACK | go_back() | go_back
172
- go_forward, # GO_FORWARD | go_forward() | go_forward
173
- goto, # GOTO_URL | goto(url) | goto [url]
174
- tab_close, # PAGE_CLOSE | tab_close() | close_tab
175
- # CHECK | |
176
- select_option, # SELECT_OPTION | |
177
- send_msg_to_user, # STOP | stop(answer) | stop [answer]
178
- # CLEAR | |
179
- upload_file, # UPLOAD | |
180
- report_infeasible, ## explicit unachievable action, equivalent STOP "N/A"
181
- ],
182
- # from workarena paper
183
- # https://arxiv.org/abs/2403.07718
184
- "workarena": [
185
- scroll,
186
- fill,
187
- select_option,
188
- click,
189
- dblclick,
190
- hover,
191
- press,
192
- focus,
193
- clear,
194
- drag_and_drop,
195
- send_msg_to_user,
196
- ],
197
- # from workarena++ paper
198
- # https://arxiv.org/abs/2407.05291
199
- "workarena++": [
200
- scroll,
201
- fill,
202
- select_option,
203
- click,
204
- dblclick,
205
- hover,
206
- press,
207
- focus,
208
- clear,
209
- drag_and_drop,
210
- tab_focus,
211
- new_tab,
212
- tab_close,
213
- go_back,
214
- go_forward,
215
- goto,
216
- send_msg_to_user,
217
- report_infeasible,
218
- ],
219
- # from weblinx_browsergym
220
- # https://github.com/McGill-NLP/agentlab-weblinx-mvp/blob/a91b6d19870c5187d252e70a2e2013511cc6f1d2/weblinx_browsergym/__init__.py#L274-L286
221
- "weblinx": [
222
- send_msg_to_user, # say(speaker="assistant", utterance=[str]) -> send_msg_to_user(text=[str])
223
- click, # click(uid=[element id]) -> click(bid=[element id])
224
- hover, # hover(uid=[element id]) -> hover(bid=[element id])
225
- fill, # textinput(uid=[element id], value=[str]) -> fill(bid=[element id], value=[str])
226
- # change(uid=[element], value=[str]) -> ❌
227
- goto, # load(url=[link]) -> goto(url=[link])
228
- # submit(uid=[element]) -> click(bid=[element id])
229
- scroll, # scroll(x=[int x],y=[int y]) -> scroll(delta_x=[int x], delta_y=[int y])
230
- # copy(uid=[element],text=[str]) -> ❌
231
- # paste(uid=[element],text=[str]) -> ❌
232
- new_tab, # tabcreate() -> new_tab()
233
- tab_close, # tabremove(target=[tabId]) -> tab_close()
234
- tab_focus, # tabswitch(origin=[origin tabId],target=[target tabId]) -> tab_focus(index=[target tabid])
235
- ],
236
- # from assistantbench paper
237
- # https://arxiv.org/abs/2407.15711
238
- "assistantbench": [
239
- scroll, # SCROLL
240
- fill, # TYPE
241
- select_option, # SELECT
242
- click, # CLICK
243
- press, # PRESS ENTER
244
- go_back, # GOBACK
245
- goto, # GOTO, SEARCH
246
- send_msg_to_user, # TERMINATE
247
- ],
248
- }
249
-
250
-
251
- @dataclass
252
- class HighLevelAction:
253
- # entrypoint: callable
254
- signature: str
255
- description: str
256
- examples: list[str]
257
-
258
-
259
- class HighLevelActionSet(AbstractActionSet):
260
-
261
- # static class variables
262
- ActionSubset = typing.Literal[
263
- "chat",
264
- "infeas",
265
- "bid",
266
- "coord",
267
- "nav",
268
- "tab",
269
- "miniwob_all",
270
- "miniwob_shi17",
271
- "miniwob_liu18",
272
- "miniwob_humphreys22",
273
- "webarena",
274
- "visualwebarena",
275
- "workarena",
276
- "workarena++",
277
- "weblinx",
278
- "assistantbench",
279
- "custom",
280
- ]
281
- DemoMode = typing.Literal["off", "default", "all_blue", "only_visible_elements"]
282
-
283
- def __init__(
284
- self,
285
- subsets: typing.Optional[ActionSubset | list[ActionSubset]] = [
286
- "chat",
287
- "infeas",
288
- "bid",
289
- "nav",
290
- "tab",
291
- ],
292
- custom_actions: typing.Optional[list[callable]] = None,
293
- multiaction: bool = True,
294
- demo_mode: typing.Optional[DemoMode] = None,
295
- strict: bool = False,
296
- retry_with_force: bool = False,
297
- ):
298
- super().__init__(strict)
299
- self.multiaction = multiaction
300
- self.demo_mode = demo_mode
301
- self.retry_with_force = retry_with_force
302
-
303
- if not subsets:
304
- raise ValueError(f"'action_subsets' is empty.")
305
-
306
- if isinstance(subsets, str):
307
- subsets = [subsets]
308
-
309
- allowed_actions = [noop] # the noop action is always allowed
310
-
311
- # add actions from specified action sets
312
- if subsets:
313
- for subset in subsets:
314
- if subset in ACTION_SUBSETS:
315
- allowed_actions.extend(ACTION_SUBSETS[subset])
316
- elif subset == "custom":
317
- if not custom_actions:
318
- raise ValueError(
319
- "'custom' is in 'action_subsets' but 'custom_actions' is empty."
320
- )
321
- allowed_actions.extend(custom_actions)
322
- else:
323
- raise ValueError(f"Unknown high-level action subspace: {subset}")
324
-
325
- # like set() but preserves order
326
- # https://stackoverflow.com/questions/1653970/does-python-have-an-ordered-set
327
- allowed_actions = list(dict.fromkeys(allowed_actions).keys())
328
-
329
- # parse the actions and build the action space
330
- self.action_set: dict[str, HighLevelAction] = {}
331
- self.python_includes = ""
332
-
333
- # include playwright imports
334
- self.python_includes += f"""\
335
- import playwright.sync_api
336
- from typing import Literal
337
-
338
-
339
- """
340
- # set demo_mode and retry_with_force flags
341
- self.python_includes += f"""\
342
- demo_mode={repr(demo_mode)}
343
- retry_with_force={repr(retry_with_force)}
344
-
345
- if demo_mode is None:
346
- demo_mode = "default" if DEMO_MODE else "off"
347
-
348
- """
349
-
350
- # include utility functions
351
- for _, func in inspect.getmembers(utils, inspect.isfunction):
352
- self.python_includes += f"""\
353
- {inspect.getsource(func)}
354
-
355
-
356
- """
357
-
358
- # parse and include action functions
359
- for func in allowed_actions:
360
-
361
- # include action function definition in the code
362
- self.python_includes += f"""\
363
- {inspect.getsource(func)}
364
-
365
-
366
- """
367
-
368
- # extract action signature
369
- signature = f"{func.__name__}{inspect.signature(func)}"
370
-
371
- # parse docstring
372
- description, examples = action_docstring_parser.parse_string(func.__doc__)
373
-
374
- # reconstruct action description
375
- description = " ".join(description)
376
-
377
- # reconstruct action examples
378
- examples = [
379
- function_name + "(" + ", ".join([repr(arg) for arg in function_args]) + ")"
380
- for function_name, function_args in examples
381
- ]
382
-
383
- if func.__name__ in self.action_set:
384
- raise ValueError(f"Duplicated action '{func.__name__}'")
385
-
386
- self.action_set[func.__name__] = HighLevelAction(
387
- # entrypoint=func,
388
- signature=signature,
389
- description=description,
390
- examples=examples,
391
- )
392
-
393
- def example_action(self, abstract: bool, max_examples: int = 3) -> str:
394
- """
395
- Returns an example action as a string.
396
- """
397
- if abstract:
398
- if self.multiaction:
399
- return """\
400
- One or several actions, separated by new lines."""
401
- else:
402
- return """\
403
- One single action to be executed. You can only use one action at a time."""
404
- else:
405
- picked_examples = []
406
-
407
- # use fill and click examples if action is present
408
- for action_name in ["fill", "click", "mouse_click", "keyboard_type"]:
409
- if action_name in self.action_set:
410
- picked_examples.extend(self.action_set[action_name].examples)
411
-
412
- # last resort, use all action examples
413
- if not picked_examples:
414
- for _, action in self.action_set.items():
415
- picked_examples += action.examples
416
-
417
- # shuffle examples
418
- rng = random.Random(1)
419
- rng.shuffle(picked_examples)
420
-
421
- if self.multiaction:
422
- return "\n".join(picked_examples[:max_examples])
423
- else:
424
- return picked_examples[0]
425
-
426
- def describe(self, with_long_description: bool = True, with_examples: bool = True):
427
- """
428
- Returns a textual description of this action space.
429
- """
430
- description = f"""
431
- {len(self.action_set)} different types of actions are available.
432
-
433
- """
434
- for _, action in self.action_set.items():
435
- description += f"""\
436
- {action.signature}
437
- """
438
-
439
- if with_long_description:
440
- description += f"""\
441
- Description: {action.description}
442
- """
443
- if with_examples and action.examples:
444
- description += f"""\
445
- Examples:
446
- """
447
- for example in action.examples:
448
- description += f"""\
449
- {example}
450
-
451
- """
452
-
453
- if self.multiaction:
454
- description += f"""\
455
- Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
456
- More than 2-3 actions usually leads to failure or unexpected behavior."""
457
- else:
458
- description += f"""\
459
- Only a single action can be provided at once."""
460
-
461
- example_action = self.example_action(abstract=False)
462
- if example_action:
463
- description += f""" Example:
464
- {example_action}
465
- """
466
- else:
467
- description += f"""\
468
-
469
- """
470
-
471
- return description
472
-
473
- def to_python_code(self, action):
474
- """
475
- Converts the given high-level action string to browsergym-compatible python code.
476
-
477
- Args:
478
- action: the high-level action to parse.
479
-
480
- Returns:
481
- Executable python code that performs the action in a browsergym environment.
482
- """
483
- highlevel_code = action
484
-
485
- # do the actual parsing and convert each high-level action to
486
- # the corresponding python function call
487
- if self.strict:
488
- function_calls = highlevel_action_parser.parse_string(highlevel_code, parse_all=True)
489
- function_calls = function_calls.as_list()
490
- else:
491
- function_calls = highlevel_action_parser.search_string(
492
- highlevel_code
493
- ) # allow for multiple matches, skip anything in-between
494
- function_calls = sum(function_calls.as_list(), []) # unpack multiple matches
495
-
496
- if not function_calls:
497
- raise ValueError("Received an empty action.")
498
- elif len(function_calls) > 1 and not self.multiaction:
499
- raise ValueError("Received a multi-action, only single-actions are allowed.")
500
-
501
- python_code = ""
502
-
503
- # function definitions
504
- python_code += self.python_includes
505
-
506
- # function calls
507
- for function_name, function_args in function_calls:
508
- if function_name not in self.action_set:
509
- raise NameError(f"Invalid action type '{function_name}'.")
510
- python_code += (
511
- function_name + "(" + ", ".join([repr(arg) for arg in function_args]) + ")\n"
512
- )
513
-
514
- # return the constructed python code
515
- return python_code
516
-
517
-
518
- # consistency checks
519
- assert "custom" not in ACTION_SUBSETS
520
- assert set(typing.get_args(HighLevelActionSet.ActionSubset)) == set(
521
- list(ACTION_SUBSETS.keys()) + ["custom"]
522
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/action/parsers.py DELETED
@@ -1,92 +0,0 @@
1
- import ast
2
- import pyparsing as pp
3
-
4
- from dataclasses import dataclass
5
- from typing import Any
6
-
7
-
8
- @dataclass
9
- class NamedArgument:
10
- name: str
11
- value: Any
12
-
13
- def __repr__(self):
14
- return f"{self.name}={repr(self.value)}"
15
-
16
-
17
- def _build_highlevel_action_parser() -> pp.ParserElement:
18
- """
19
- Returns:
20
- An action parser that accepts Python-like function calls with string, number, list or dict literals as arguments.
21
- Example:
22
- func("a", 42, None, True, [2, 4, "s"], {"a_key": "a_value"}, )
23
- The parser is loose and accepts multi-line or single-line combinations af calls.
24
- Example:
25
- func() func()
26
- \tfunc()
27
- Python comments are ignored.
28
- Example:
29
- # this is a comment
30
- func() # this function call will be parsed
31
- # func() # this one will not
32
- The parser will return a list of (function_name, function_args) tuples, one for each function call in the input.
33
- The parser will raise exceptions
34
-
35
- """
36
-
37
- def make_keyword(kwd_str, kwd_value):
38
- return pp.Keyword(kwd_str).set_parse_action(pp.replace_with(kwd_value))
39
-
40
- TRUE = make_keyword("True", True)
41
- FALSE = make_keyword("False", False)
42
- NONE = make_keyword("None", None)
43
-
44
- LBRACK, RBRACK, LBRACE, RBRACE, LPAREN, RPAREN, COLON = map(pp.Suppress, "[]{}():")
45
-
46
- def literal_eval(toks):
47
- return ast.literal_eval(toks[0])
48
-
49
- string = pp.python_quoted_string().set_parse_action(literal_eval)
50
- number = pp.pyparsing_common.number()
51
- dict = pp.Forward().set_name("dict") # will be defined later
52
- list = pp.Forward().set_name("list") # will be defined later
53
- _tuple = pp.Forward().set_name("tuple") # will be defined later
54
- element = (string | number | dict | list | _tuple | TRUE | FALSE | NONE).set_name("element")
55
-
56
- list_items = pp.DelimitedList(element, allow_trailing_delim=True).set_name(None)
57
- list << pp.Group(LBRACK + pp.Optional(list_items) + RBRACK, aslist=True)
58
- _tuple << pp.Group(LPAREN + pp.Optional(list_items) + RPAREN, aslist=True).set_parse_action(
59
- lambda tokens: tuple(tokens[0])
60
- )
61
-
62
- dict_item = pp.Group(string + COLON + element, aslist=True).set_name("dict item")
63
- dict_items = pp.DelimitedList(dict_item, allow_trailing_delim=True).set_name(None)
64
- dict << pp.Dict(LBRACE + pp.Optional(dict_items) + RBRACE, asdict=True)
65
-
66
- arg = element
67
- list_args = pp.DelimitedList(arg, allow_trailing_delim=True).set_name(None)
68
- named_arg = (pp.pyparsing_common.identifier() + pp.Literal("=") + element).set_parse_action(
69
- lambda tokens: NamedArgument(name=tokens[0], value=tokens[2])
70
- )
71
- list_named_args = pp.DelimitedList(named_arg, allow_trailing_delim=True).set_name(None)
72
- function_call = pp.pyparsing_common.identifier() + pp.Group(
73
- LPAREN + pp.Optional(list_args) + pp.Optional(list_named_args) + RPAREN, aslist=True
74
- )
75
-
76
- multiple_function_calls = pp.DelimitedList(pp.Group(function_call), delim="")
77
- multiple_function_calls.ignore(pp.python_style_comment())
78
-
79
- parser = multiple_function_calls
80
-
81
- return parser
82
-
83
-
84
- # this one will be used to extract python-like function calls
85
- highlevel_action_parser: pp.ParserElement = _build_highlevel_action_parser()
86
-
87
- # this one will be used to process the docstring in high-level actions, in order to describe the action space
88
- action_docstring_parser: pp.ParserElement = (
89
- pp.Group(pp.OneOrMore(pp.Word(pp.printables), stop_on=pp.Literal("Examples:")))
90
- + pp.Literal("Examples:").suppress()
91
- + pp.Group(highlevel_action_parser)
92
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/action/python.py DELETED
@@ -1,112 +0,0 @@
1
- import re
2
-
3
- from .base import AbstractActionSet
4
-
5
-
6
- class PythonActionSet(AbstractActionSet):
7
- def describe(self, with_long_description: bool = True, with_examples: bool = True):
8
- """
9
- Returns a textual description of this action space.
10
- """
11
- description = f"""
12
- Each action consists of executable Python code (python>=3.10) that uses the Playwright library (playwright==1.32)
13
- to interact with the current webpage and the browser context. The currently active webpage is accessible via the
14
- global variable `page`. A function `send_message_to_user(text)` is also accessible and can be used to send a
15
- message to the user, as well as a function `report_infeasible_instructions(reason)` to notify the user when their
16
- instructions are infeasible."""
17
- if with_long_description:
18
- description += f"""
19
- The browser context is in `page.context`, and all open webpages (tabs and popups)
20
- are in `page.context.pages`. Here is is an example of a valid action:
21
- ```
22
- frame = page.frame_locator(".result-frame")
23
- button = frame.get_by_text("Submit")
24
- button.click()
25
- ```
26
- Here is another example:
27
- ```
28
- frame = page.get_by_test_id("a").frame_locator(":scope")
29
- frame.get_by_test_id("a776").click()
30
- ```
31
- Note that Playwright's `get_by_test_id()` method is configured to use the `bid` attribute to locate HTML elements,
32
- instead of the default `data-testid`. Also, Playwright's locators can not traverse iframes, so you have to locate
33
- parent iframes first in order to locate an element in an iframe. The `bid` attribute contains all the information
34
- required to recursively locate an element. For example, an element with `bid="ac2"` can be retrieved as follows:
35
- ```
36
- frame = page.get_by_test_id("a").frame_locator(":scope")
37
- frame = frame.get_by_test_id("ac").frame_locator(":scope")
38
- elem = frame.get_by_test_id("ac2")
39
- ```
40
- """
41
- else:
42
- description += f"""\
43
-
44
- """
45
- if with_examples:
46
- description += f"""\
47
- Here are other examples of valid actions:
48
- ```
49
- page = page.context.new_page()
50
- page.goto("https://www.wikipedia.org/")
51
- ```
52
- ```
53
- page.get_by_label("Birth date").fill("2020-02-02")
54
- page.get_by_role("link", name="Get started").click()
55
- ```
56
- ```
57
- page.get_by_label('I agree to the terms above').check()
58
- ```
59
- ```
60
- page.locator('#area').fill('Hello World!')
61
- ```
62
- ```
63
- page.get_by_role("textbox").press("Control+ArrowRight")
64
- ```
65
- ```
66
- send_message_to_user("There are 7 items to choose from.")
67
- ```
68
- ```
69
- report_infeasible_instructions("I cannot follow these instructions because there is no email field in this form.")
70
- ```
71
- """
72
-
73
- return description
74
-
75
- def example_action(self, abstract: bool) -> str:
76
- """
77
- Returns an example action as a string.
78
- """
79
- if abstract:
80
- return """\
81
- One single bloc of Python code. Do not include any explanation, only valid Python code."""
82
- else:
83
- return """\
84
- frame = page.get_by_test_id("b").frame_locator(":scope")
85
- frame = page.get_by_test_id("ba").frame_locator(":scope")
86
- frame.get_by_test_id("ba2").fill("Hello world!")
87
- frame.get_by_test_id("ba3").click()
88
- """
89
-
90
- def to_python_code(self, action):
91
- """
92
- Converts the given code action string to browsergym-compatible playwright code.
93
-
94
- Args:
95
- action: the code action to parse.
96
-
97
- Returns:
98
- Executable playwright code that performs the action in a browsergym environment.
99
- """
100
-
101
- python_code = ""
102
-
103
- # extract markdown-style code snippets if detected
104
- pattern = re.compile(r"```(?:python)?\n(?P<code>[\s\S]*?)```")
105
- if pattern.match(action):
106
- python_code += "\n".join([match.group("code") for match in pattern.finditer(action)])
107
- # otherwise just use the code action as is
108
- else:
109
- python_code += action
110
-
111
- # return the produced playwright code
112
- return python_code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/action/utils.py DELETED
@@ -1,288 +0,0 @@
1
- from typing import Literal
2
-
3
- import playwright.sync_api
4
-
5
-
6
- def get_elem_by_bid(
7
- page: playwright.sync_api.Page, bid: str, scroll_into_view: bool = False
8
- ) -> playwright.sync_api.Locator:
9
- """
10
- Parse the given bid to sequentially locate every nested frame leading to the bid, then
11
- locate the bid element. Bids are expected to take the form "abDb123", which means
12
- the element abDb123 is located inside frame abDAb, which is located inside frame abDA,
13
- which is located inside frame a, which is located inside the page's main frame.
14
-
15
- Args:
16
- bid: the browsergym id (playwright testid) of the page element.
17
- scroll_into_view: try to scroll element into view, unless it is completely visible.
18
-
19
- Returns:
20
- Playwright element.
21
- Bounding box of the element.
22
- """
23
- if not isinstance(bid, str):
24
- raise ValueError(f"expected a string, got {repr(bid)}")
25
-
26
- current_frame = page
27
-
28
- # dive into each nested frame, to the frame where the element is located
29
- i = 0
30
- while bid[i:] and not bid[i:].isnumeric():
31
- i += 1
32
- # allow multi-character frame ids such as aA, bCD etc.
33
- while bid[i:] and bid[i].isalpha() and bid[i].isupper():
34
- i += 1
35
- frame_bid = bid[:i] # bid of the next frame to select
36
- frame_elem = current_frame.get_by_test_id(frame_bid)
37
- if not frame_elem.count():
38
- raise ValueError(f'Could not find element with bid "{bid}"')
39
- if scroll_into_view:
40
- frame_elem.scroll_into_view_if_needed(timeout=500)
41
- current_frame = frame_elem.frame_locator(":scope")
42
-
43
- # finally, we should have selected the frame where the target element is
44
- elem = current_frame.get_by_test_id(bid)
45
- if not elem.count():
46
- raise ValueError(f'Could not find element with bid "{bid}"')
47
- if scroll_into_view:
48
- elem.scroll_into_view_if_needed(timeout=500)
49
- return elem
50
-
51
-
52
- def highlight_by_box(
53
- page: playwright.sync_api.Page, box: dict, color: Literal["blue", "red"] = "blue"
54
- ):
55
- """Highlights the target element based on its bounding box attributes."""
56
-
57
- assert color in ("blue", "red")
58
-
59
- if box:
60
- left, top, width, height = box["x"], box["y"], box["width"], box["height"]
61
- page.evaluate(
62
- f"""\
63
- const overlay = document.createElement('div');
64
- document.body.appendChild(overlay);
65
- overlay.setAttribute('style', `
66
- all: initial;
67
- position: fixed;
68
- border: 2px solid transparent; /* Start with transparent border */
69
- borderRadius: 10px; /* Add rounded corners */
70
- boxShadow: 0 0 0px {color}; /* Initial boxShadow with 0px spread */
71
- left: {left - 2}px; /* Adjust left position to accommodate initial shadow spread */
72
- top: {top - 2}px; /* Adjust top position likewise */
73
- width: {width}px;
74
- height: {height}px;
75
- z-index: 2147483646; /* Maximum value - 1 */
76
- pointerEvents: none; /* Ensure the overlay does not interfere with user interaction */
77
- `);
78
-
79
- // Animate the boxShadow to create a "wave" effect
80
- let spread = 0; // Initial spread radius of the boxShadow
81
- const waveInterval = setInterval(() => {{
82
- spread += 10; // Increase the spread radius to simulate the wave moving outward
83
- overlay.style.boxShadow = `0 0 40px ${{spread}}px {color}`; // Update boxShadow to new spread radius
84
- overlay.style.opacity = 1 - spread / 38; // Gradually decrease opacity to fade out the wave
85
- if (spread >= 38) {{ // Assuming 76px ~ 2cm spread radius
86
- clearInterval(waveInterval); // Stop the animation once the spread radius reaches 2cm
87
- document.body.removeChild(overlay); // Remove the overlay from the document
88
- }}
89
- }}, 200); // Adjust the interval as needed to control the speed of the wave animation
90
- """
91
- )
92
- # Wait a bit to let users see the highlight
93
- page.wait_for_timeout(1000) # Adjust delay as needed
94
-
95
-
96
- def smooth_move_visual_cursor_to(
97
- page: playwright.sync_api.Page, x: float, y: float, speed: float = 400
98
- ):
99
- """
100
- Smoothly moves the visual cursor to a specific point, with constant
101
- movement speed.
102
-
103
- Args:
104
- x: target location X coordinate (in viewport pixels)
105
- y: target location Y coordinate (in viewport pixels)
106
- speed: cursor speed (in pixels per second)
107
- """
108
- movement_time = page.evaluate(
109
- """\
110
- ([targetX, targetY, speed]) => {
111
-
112
- // create cursor if needed
113
- if (!("browsergym_visual_cursor" in window)) {
114
- if (window.trustedTypes && window.trustedTypes.createPolicy) {
115
- window.trustedTypes.createPolicy('default', {
116
- createHTML: (string, sink) => string
117
- });
118
- }
119
- let cursor = document.createElement('div');
120
- cursor.setAttribute('id', 'browsergym-visual-cursor');
121
- cursor.innerHTML = `
122
- <svg width="50px" height="50px" viewBox="213 106 713 706" fill="none" xmlns="http://www.w3.org/2000/svg">
123
- <path d="M213.333 106.667L426.667 853.333 512 512 853.333 426.667 213.333 106.667z" fill="blue"/>
124
- </svg>
125
- `;
126
- cursor.setAttribute('style', `
127
- all: initial;
128
- position: fixed;
129
- opacity: 0.7; /* Slightly transparent */
130
- z-index: 2147483647; /* Maximum value */
131
- pointer-events: none; /* Ensures the SVG doesn't interfere with page interactions */
132
- `);
133
-
134
- // Calculate center position within the viewport
135
- const centerX = window.innerWidth / 2;
136
- const centerY = window.innerHeight / 2;
137
-
138
- cursor.style.left = `${centerX}px`;
139
- cursor.style.top = `${centerY}px`;
140
-
141
- // save cursor element
142
- window.browsergym_visual_cursor = cursor;
143
- window.browsergym_visual_cursor_n_owners = 0;
144
- }
145
-
146
- // recover cursor
147
- let cursor = window.browsergym_visual_cursor;
148
-
149
- // attach cursor to document
150
- document.body.appendChild(cursor);
151
- window.browsergym_visual_cursor_n_owners += 1;
152
-
153
- x = parseFloat(cursor.style.left);
154
- y = parseFloat(cursor.style.top);
155
-
156
- dx = targetX - x;
157
- dy = targetY - y;
158
- dist = Math.hypot(dx, dy);
159
- movement_time = (dist / speed) * 1000; // seconds to milliseconds
160
- still_wait_time = 1000;
161
-
162
- // Adjust steps based on distance to keep movement speed consistent
163
- // 1 step per 10 pixels of distance, adjust as needed
164
- steps = Math.max(1, Math.trunc(dist / 10));
165
-
166
- step_dx = dx / steps;
167
- step_dy = dy / steps;
168
- step_dist = dist / steps;
169
- step_wait_time = Math.max(10, movement_time / steps);
170
-
171
- let step = 0;
172
- let time_still = 0;
173
- const cursorInterval = setInterval(() => {
174
- // move cursor
175
- if (step < steps) {
176
- x += step_dx;
177
- y += step_dy;
178
- cursor.style.left = `${x}px`;
179
- cursor.style.top = `${y}px`;
180
- }
181
- // still cursor (wait a bit)
182
- else if (time_still < still_wait_time) {
183
- time_still += step_wait_time;
184
- }
185
- // stop and detach cursor
186
- else {
187
- clearInterval(cursorInterval);
188
- window.browsergym_visual_cursor_n_owners -= 1;
189
- if (window.browsergym_visual_cursor_n_owners <= 0) {
190
- document.body.removeChild(cursor);
191
-
192
- }
193
- }
194
- step += 1;
195
- }, step_wait_time);
196
-
197
- return movement_time;
198
- }""",
199
- [x, y, speed],
200
- )
201
- page.wait_for_timeout(movement_time)
202
-
203
-
204
- def check_for_overlay(
205
- page: playwright.sync_api.Page, bid: str, element: playwright.sync_api.ElementHandle, box: dict
206
- ):
207
- if not element:
208
- return False
209
-
210
- visibility = element.get_attribute("browsergym_visibility_ratio")
211
- if visibility is not None:
212
- return float(visibility) >= 0.5
213
-
214
- """Checks if a given element is the topmost element at its center position by default.
215
- If check_corners is True, it checks if any of the corners is visible."""
216
- if box:
217
- # corners
218
- points_to_check = [
219
- (box["x"], box["y"]),
220
- (box["x"] + box["width"], box["y"]),
221
- (box["x"], box["y"] + box["height"]),
222
- (box["x"] + box["width"], box["y"] + box["height"]),
223
- ]
224
-
225
- for x, y in points_to_check:
226
- # Execute JavaScript to find the topmost element at the point.
227
- top_element = page.evaluate(
228
- f"""() => {{
229
- const el = document.elementFromPoint({x}, {y});
230
- return el ? el.outerHTML : '';
231
- }}"""
232
- )
233
-
234
- # Check if the topmost element is the element we're interested in.
235
- if top_element and bid in top_element:
236
- return True
237
-
238
- return False
239
-
240
-
241
- def add_demo_mode_effects(
242
- page: playwright.sync_api.Page,
243
- elem: playwright.sync_api.ElementHandle,
244
- bid: str,
245
- demo_mode: Literal["off", "default", "all_blue", "only_visible_elements"],
246
- move_cursor: bool = True,
247
- highlight_box: bool = True,
248
- ):
249
- if demo_mode == "off":
250
- return
251
-
252
- """Adds visual effects to the target element"""
253
- box = elem.bounding_box()
254
- # box = extract_bounds_cdp(page, bid)
255
- if box:
256
- center_x, center_y = box["x"] + box["width"] / 2, box["y"] + box["height"] / 2
257
- is_top_element = check_for_overlay(page, bid, elem, box)
258
-
259
- if demo_mode == "only_visible_elements":
260
- if not is_top_element:
261
- return
262
- else:
263
- color = "blue"
264
-
265
- elif demo_mode == "default":
266
- if is_top_element:
267
- color = "blue"
268
- else:
269
- color = "red"
270
-
271
- elif demo_mode == "all_blue":
272
- color = "blue"
273
-
274
- if move_cursor:
275
- smooth_move_visual_cursor_to(page, center_x, center_y)
276
-
277
- if highlight_box:
278
- highlight_by_box(page, box, color=color)
279
-
280
-
281
- def call_fun(fun: callable, retry_with_force: bool):
282
- try:
283
- fun(force=False)
284
- except playwright.sync_api.TimeoutError as e:
285
- if retry_with_force:
286
- fun(force=True)
287
- else:
288
- raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/chat.py DELETED
@@ -1,95 +0,0 @@
1
- import base64
2
- from pathlib import Path
3
- from typing import Literal
4
- import logging
5
- import playwright.sync_api
6
- import re
7
- import time
8
-
9
- from importlib import resources
10
-
11
- from . import _get_global_playwright, chat_files
12
-
13
-
14
- CHATBOX_DIR = resources.files(chat_files)
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- class Chat:
20
- def __init__(
21
- self, headless: bool, chat_size=(500, 800), record_video_dir=None, modern=True
22
- ) -> None:
23
- self.messages = []
24
-
25
- # create a new browser, browser context and page for the chat
26
- pw: playwright.sync_api.Playwright = _get_global_playwright()
27
- self.browser = pw.chromium.launch(
28
- headless=headless, args=[f"--window-size={chat_size[0]},{chat_size[1]}"]
29
- )
30
- self.context = self.browser.new_context(
31
- no_viewport=True,
32
- record_video_dir=Path(record_video_dir) / "chat_video" if record_video_dir else None,
33
- record_video_size=dict(width=chat_size[0], height=chat_size[1]),
34
- )
35
- self.page = self.context.new_page()
36
- self.recording_start_time = time.time() if record_video_dir else None
37
-
38
- # setup the chat page
39
- self.page.expose_function(
40
- "send_user_message", lambda msg: self._js_user_message_received_callback(msg=msg)
41
- )
42
-
43
- if modern:
44
- self.page.set_content(get_chatbox_modern(CHATBOX_DIR))
45
- else:
46
- self.page.set_content(get_chatbox_classic(CHATBOX_DIR))
47
-
48
- def _js_user_message_received_callback(self, msg: str):
49
- """Callback function for when a user message is received in the chatbox"""
50
- utc_time = time.time()
51
- self.messages.append({"role": "user", "timestamp": utc_time, "message": msg})
52
- # returning a list as JS doesnt like tuples
53
- return ["user", time.strftime("%H:%M", time.localtime(utc_time)), msg]
54
-
55
- def add_message(
56
- self, role: Literal["user", "user_image", "assistant", "info", "infeasible"], msg: str
57
- ):
58
- """Add a message to the chatbox and update the page accordingly."""
59
- utc_time = time.time()
60
- if role not in ("user", "user_image", "assistant", "info", "infeasible"):
61
- raise ValueError(f"Invalid role: {role}")
62
- if role in ("user", "user_image", "assistant", "infeasible"):
63
- self.messages.append({"role": role, "timestamp": utc_time, "message": msg})
64
- timestamp = time.strftime("%H:%M:%S", time.localtime(utc_time))
65
- self.page.evaluate(f"addChatMessage({repr(role)}, {repr(timestamp)}, {repr(msg)});")
66
-
67
- def wait_for_user_message(self):
68
- logger.info("Waiting for message from user...")
69
- # reset flag
70
- self.page.evaluate("USER_MESSAGE_RECEIVED = false;")
71
- # wait for flag to be raised
72
- self.page.wait_for_function("USER_MESSAGE_RECEIVED", polling=100, timeout=0)
73
- logger.info("Message received.")
74
-
75
- def close(self):
76
- self.context.close()
77
- self.browser.close()
78
-
79
-
80
- def get_chatbox_modern(chatbox_dir) -> str:
81
- with open(chatbox_dir / "chatbox_modern.html", "r") as file:
82
- chatbox_html = file.read()
83
-
84
- return chatbox_html
85
-
86
-
87
- def get_chatbox_classic(chatbox_dir) -> str:
88
- with open(chatbox_dir / "chatbox.html", "r") as file:
89
- chatbox_html = file.read()
90
- with open(chatbox_dir / "assistant.png", "rb") as f:
91
- image_base64 = base64.b64encode(f.read()).decode("utf-8")
92
-
93
- assistant_image_url = f"data:image/png;base64,{image_base64}"
94
- chatbox_html = re.sub("<ASSISTANT_IMAGE_URL>", assistant_image_url, chatbox_html)
95
- return chatbox_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
BrowserGym/browsergym/core/src/browsergym/core/chat_files/chatbox.html DELETED
@@ -1,243 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
-
4
- <head>
5
- <meta charset="UTF-8">
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>UI Assistant Chat</title>
8
- <style>
9
- .chat-container {
10
- display: flex;
11
- flex-flow: column;
12
- position: fixed;
13
- bottom: 0;
14
- right: 0;
15
- height: 100%;
16
- width: 100%;
17
- border: 1px solid black;
18
- background-color: white;
19
- padding: 0;
20
- overflow: hidden;
21
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
22
- font-family: 'Source Sans Pro', Arial, Helvetica, sans-serif;
23
- }
24
-
25
- .chat-header {
26
- background-color: #032D42;
27
- color: white;
28
- padding: 5px;
29
- padding-left: 15px;
30
- text-align: center;
31
- flex: 0 1 auto;
32
- }
33
-
34
- .chat-body {
35
- padding: 10px;
36
- overflow-y: auto;
37
- display: flex;
38
- flex-direction: column;
39
- flex: 1 1 auto;
40
- }
41
-
42
- .chat-debug {
43
- padding: 10px;
44
- max-height: 30%;
45
- overflow-y: auto;
46
- display: flex;
47
- flex-direction: column;
48
- flex: 0 0 auto;
49
- }
50
-
51
- .chat-input-area {
52
- display: flex;
53
- flex-flow: row;
54
- margin-top: 5px;
55
- margin-top: 5px;
56
- padding: 10px;
57
- border-top: 1px solid #ddd;
58
- flex: 0 1 50px;
59
- }
60
-
61
- .chat-input-area form {
62
- display: flex;
63
- width: 100%;
64
- height: 100%;
65
- }
66
-
67
- .input-box {
68
- padding: 5px;
69
- margin-right: 10px;
70
- border-radius: 5px;
71
- border: 1px solid #ccc;
72
- width: 100%;
73
- }
74
-
75
- .submit-button {
76
- padding: 5px 10px;
77
- border-radius: 5px;
78
- background-color: #4CAF50;
79
- color: white;
80
- border: none;
81
- align-self: center;
82
- }
83
-
84
- .message {
85
- display: flex;
86
- align-items: center;
87
- margin: 0px;
88
- padding: 0px;
89
- }
90
-
91
- .message p {
92
- padding: 10px;
93
- /* Added padding inside the bubble */
94
- border-radius: 15px;
95
- flex-grow: 1;
96
- margin-top: 10;
97
- margin-bottom: 0;
98
- }
99
-
100
- .chat-debug .message p {
101
- padding: 0;
102
- border-radius: 0;
103
- flex-grow: 1;
104
- margin-top: 0;
105
- margin-bottom: 0;
106
- }
107
-
108
- .user-message {
109
- background-color: #d1f4d1;
110
- }
111
-
112
- .assistant-message {
113
- background-color: #e0e0e0;
114
- }
115
-
116
- .info-message {
117
- background-color: #f0f0f0;
118
- color: #707070;
119
- font-size: 13px;
120
- }
121
-
122
- .assistant-image {
123
- margin: 0px;
124
- padding: 10px;
125
- width: 40px;
126
- }
127
- </style>
128
- </head>
129
-
130
- <body>
131
-
132
-
133
-
134
- <div class="chat-container">
135
- <div class="chat-header">
136
- <h2>BrowserGym</h2>
137
- </div>
138
- <div class="chat-body" id="chatBody"></div>
139
- <div class="chat-debug" id="chatDebug"></div>
140
- <div class="chat-input-area">
141
- <form id="chatForm">
142
- <textarea class="input-box" rows="2" id="inputBox"></textarea>
143
- <input type="submit" class="submit-button" value="Send">
144
- </form>
145
- </div>
146
- </div>
147
-
148
- <script>
149
-
150
- const assistant_image_data = "<ASSISTANT_IMAGE_URL>";
151
-
152
- var USER_MESSAGE_RECEIVED = false;
153
-
154
- function escapeHtml(unsafe) {
155
- return unsafe
156
- .replace(/&/g, "&amp;")
157
- .replace(/</g, "&lt;")
158
- .replace(/>/g, "&gt;")
159
- .replace(/"/g, "&quot;")
160
- .replace(/'/g, "&#039;");
161
- }
162
-
163
- function addChatMessage(role, msg) {
164
- const chatBody = document.getElementById('chatBody');
165
- const chatDebug = document.getElementById('chatDebug');
166
- const msgContainer = document.createElement('div');
167
- msgContainer.className = 'message';
168
-
169
- const text = document.createElement('p');
170
- text.innerHTML = escapeHtml(msg);
171
-
172
- const assistant_img = document.createElement('img');
173
- assistant_img.src = assistant_image_data;
174
- assistant_img.alt = 'Assistant';
175
- assistant_img.className = 'assistant-image';
176
-
177
-
178
- switch (role) {
179
- case "user":
180
- text.className = 'user-message';
181
- msgContainer.appendChild(text);
182
- chatBody.appendChild(msgContainer);
183
- break;
184
- case "assistant":
185
- text.className = 'assistant-message';
186
- msgContainer.appendChild(assistant_img); // Add the image to the message container
187
- msgContainer.appendChild(text);
188
- chatBody.appendChild(msgContainer);
189
- break;
190
- case "info":
191
- text.className = 'info-message';
192
- text.innerHTML = msg;
193
- msgContainer.appendChild(text);
194
- // hide previous debug messages
195
- for (const msg of chatDebug.children) {
196
- msg.style.display = 'none';
197
- }
198
- chatDebug.appendChild(msgContainer);
199
- break;
200
- default:
201
- throw new TypeError(`Illegal role "${role}".`);
202
- }
203
-
204
- chatBody.scrollTop = chatBody.scrollHeight;
205
-
206
- if (role === "user") {
207
- USER_MESSAGE_RECEIVED = true;
208
- }
209
- }
210
-
211
- if (typeof send_user_message !== 'function') {
212
- function send_user_message(msg) {
213
- // This will be overloaded by playwright
214
- }
215
- }
216
-
217
- const inputBox = document.getElementById('inputBox');
218
-
219
- function send_msg(msg) {
220
- if (msg.trim()) {
221
- send_user_message(msg);
222
- addChatMessage('user', msg);
223
- inputBox.value = '';
224
- }
225
- }
226
-
227
- inputBox.onkeypress = (e) => {
228
- if (e.key === 'Enter' && !e.shiftKey) {
229
- e.preventDefault();
230
- send_msg(inputBox.value);
231
- }
232
- };
233
-
234
- document.getElementById('chatForm').onsubmit = function (event) {
235
- event.preventDefault();
236
- send_msg(inputBox.value);
237
- return false;
238
- }
239
- </script>
240
-
241
- </body>
242
-
243
- </html>