Spaces:

iruno
/

test_wprm3

Sleeping

App Files Files Community

iruno commited on May 20

Commit

498ffec

verified ·

1 Parent(s): 5a0e61c

Upload 245 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

BrowserGym/.gitignore +154 -0
BrowserGym/.pre-commit-config.yaml +44 -0
BrowserGym/.readthedocs.yaml +32 -0
BrowserGym/LICENSE +13 -0
BrowserGym/Makefile +17 -0
BrowserGym/README.md +254 -0
BrowserGym/browsergym/assistantbench/README.md +21 -0
BrowserGym/browsergym/assistantbench/pyproject.toml +35 -0
BrowserGym/browsergym/assistantbench/requirements.txt +4 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/__init__.py +54 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_dicts.py +68 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_factory.py +28 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_numbers.py +34 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_strings.py +174 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/utils.py +25 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluator.py +132 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/task.py +142 -0
BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/utils.py +73 -0
BrowserGym/browsergym/browsergym.egg-info/PKG-INFO +22 -0
BrowserGym/browsergym/browsergym.egg-info/SOURCES.txt +6 -0
BrowserGym/browsergym/browsergym.egg-info/dependency_links.txt +1 -0
BrowserGym/browsergym/browsergym.egg-info/requires.txt +8 -0
BrowserGym/browsergym/browsergym.egg-info/top_level.txt +1 -0
BrowserGym/browsergym/core/README.md +10 -0
BrowserGym/browsergym/core/pyproject.toml +42 -0
BrowserGym/browsergym/core/requirements.txt +8 -0
BrowserGym/browsergym/core/src/browsergym/core/__init__.py +27 -0
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/__init__.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/chat.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/constants.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/env.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/observation.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/registration.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/spaces.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/__pycache__/task.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/action/__init__.py +11 -0
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/__init__.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/base.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/functions.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/highlevel.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/parsers.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/utils.cpython-311.pyc +0 -0
BrowserGym/browsergym/core/src/browsergym/core/action/base.py +63 -0
BrowserGym/browsergym/core/src/browsergym/core/action/functions.py +624 -0
BrowserGym/browsergym/core/src/browsergym/core/action/highlevel.py +522 -0
BrowserGym/browsergym/core/src/browsergym/core/action/parsers.py +92 -0
BrowserGym/browsergym/core/src/browsergym/core/action/python.py +112 -0
BrowserGym/browsergym/core/src/browsergym/core/action/utils.py +288 -0
BrowserGym/browsergym/core/src/browsergym/core/chat.py +95 -0
BrowserGym/browsergym/core/src/browsergym/core/chat_files/chatbox.html +243 -0

BrowserGym/.gitignore ADDED Viewed

	@@ -0,0 +1,154 @@

+.DS_store
+.idea/
+docs/src/generated/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# error logs
+error_logs.txt
+# tests
+tests/results
+tmp.py
+.vscode/**
+# demo and results
+results/
+.vscode/launch.json
+# assistantbench
+tests/assistantbench/assistantbench-predictions-test.jsonl
+# weblinx
+bg_wl_data/
+uv.lock

BrowserGym/.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,44 @@

+fail_fast: false
+default_language_version:
+  python: python3
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.2.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: ^(.*)\.md$
+      - id: end-of-file-fixer
+      - id: check-yaml
+        exclude: ^(.circleci/recipe|recipe)  # conda build recipes are templated
+      - id: check-added-large-files
+  - repo: https://github.com/pocc/pre-commit-hooks
+    rev: v1.1.1
+    hooks:
+      - id: clang-format
+        args: [--style=file, -i]
+      - id: clang-tidy
+        args: [--fix, --fix-errors]
+  - repo: https://github.com/psf/black
+    rev: 24.2.0
+    hooks:
+      - id: black
+        args: [--config=./pyproject.toml]
+  - repo: https://github.com/asottile/blacken-docs
+    rev: v1.12.1
+    hooks:
+    - id: blacken-docs
+      args: [ '--line-length', '100' ]
+      additional_dependencies: [black]
+  - repo: https://github.com/Lucas-C/pre-commit-hooks
+    rev: v1.5.5
+    hooks:
+    - id: forbid-crlf
+    - id: remove-crlf
+    # Black does not clear tabs in docstrings
+    - id: forbid-tabs
+      files: '.*\.py$'
+    - id: remove-tabs
+      files: '.*\.py$'
+      args: [ '--whitespaces-count', '4' ]

BrowserGym/.readthedocs.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+# Required
+version: 2
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.12"
+    # You can also specify other tool versions:
+    # nodejs: "19"
+    # rust: "1.64"
+    # golang: "1.19"
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/src/conf.py
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+#    - pdf
+#    - epub
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+   install:
+   - requirements: docs/requirements.txt

BrowserGym/LICENSE ADDED Viewed

	@@ -0,0 +1,13 @@

+   Copyright 2024 ServiceNow
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

BrowserGym/Makefile ADDED Viewed

	@@ -0,0 +1,17 @@

+install:
+	@echo "--- 🚀 Installing project dependencies ---"
+	pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/
+	playwright install chromium
+install-demo:
+	@echo "--- 🚀 Installing demo dependencies ---"
+	pip install -r demo_agent/requirements.txt
+	playwright install chromium
+demo:
+	@echo "--- 🚀 Running demo agent ---"
+	(set -x && cd demo_agent && python run_demo.py)
+test-core:
+	@echo "--- 🧪 Running tests ---"
+	pytest -n auto ./tests/core

BrowserGym/README.md ADDED Viewed

	@@ -0,0 +1,254 @@

+<div align="center">
+![BrowserGym banner](https://github.com/user-attachments/assets/4853f210-43ac-4107-a0d2-95c9c614dbe7)
+🛠️ [Setup](#%EF%B8%8F-setup) -
+🏋 [Usage](#-usage) -
+💻 [Demo](#-demo) -
+🌐 [Ecosystem](#-ecosystem) -
+🚀 [AgentLab](https://github.com/ServiceNow/AgentLab) -
+🌟 [Contributors](#-contributors) -
+📄 [Paper](https://arxiv.org/abs/2412.05467) -
+📝 [Citation](#-citing-this-work)
+[![pypi](https://badge.fury.io/py/browsergym.svg)](https://pypi.org/project/browsergym/)
+[![PyPI - License](https://img.shields.io/pypi/l/browsergym?style=flat-square)]([https://opensource.org/licenses/MIT](http://www.apache.org/licenses/LICENSE-2.0))
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/browsergym-core?style=flat-square)](https://pypistats.org/packages/browsergym-core)
+[![GitHub star chart](https://img.shields.io/github/stars/ServiceNow/BrowserGym?style=flat-square)](https://star-history.com/#ServiceNow/BrowserGym)
+[![Code Format](https://github.com/ServiceNow/BrowserGym/actions/workflows/code_format.yml/badge.svg)](https://github.com/ServiceNow/BrowserGym/actions/workflows/code_format.yml)
+[![Tests](https://github.com/ServiceNow/BrowserGym/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/ServiceNow/BrowserGym/actions/workflows/unit_tests.yml)
+```python
+pip install browsergym
+```
+</div>
+> [!WARNING]
+> BrowserGym is meant to provide an open, easy-to-use and extensible framework to accelerate the field of web agent research.
+> It is not meant to be a consumer product. Use with caution!
+> [!TIP]
+> 🚀 Check out [AgentLab](https://github.com/ServiceNow/AgentLab)✨ !
+> A seamless framework to implement, test, and evaluate your web agents on all BrowserGym benchmarks.
+https://github.com/ServiceNow/BrowserGym/assets/26232819/e0bfc788-cc8e-44f1-b8c3-0d1114108b85
+_Example of a GPT4-V agent executing openended tasks (top row, chat interactive), as well as WebArena and WorkArena tasks (bottom row)._
+BrowserGym includes the following benchmarks by default:
+ - [MiniWoB](https://miniwob.farama.org/)
+ - [WebArena](https://webarena.dev/)
+ - [VisualWebArena](https://jykoh.com/vwa)
+ - [WorkArena](https://github.com/ServiceNow/WorkArena)
+ - [AssistantBench](https://github.com/oriyor/assistantbench)
+ - [WebLINX](https://github.com/McGill-NLP/weblinx) (static benchmark)
+Designing new web benchmarks with BrowserGym is easy, and simply requires to inherit the [`AbstractBrowserTask`](https://github.com/ServiceNow/BrowserGym/blob/main/browsergym/core/src/browsergym/core/task.py#L7C7-L7C26) class.
+## 🛠️ Setup
+To use browsergym, install one of the following packages:
+```sh
+pip install browsergym  # (recommended) everything below
+pip install browsergym-experiments  # experiment utilities (agent, loop, benchmarks) + everything below
+pip install browsergym-core  # core functionalities only (no benchmark, just the openended task)
+pip install browsergym-miniwob  # core + miniwob
+pip install browsergym-webarena  # core + webarena
+pip install browsergym-visualwebarena  # core + visualwebarena
+pip install browsergym-workarena  # core + workarena
+pip install browsergym-assistantbench  # core + assistantbench
+pip install weblinx-browsergym  # core + weblinx
+```
+Then setup playwright by running
+```sh
+playwright install chromium
+```
+Finally, each benchmark comes with its own specific setup that requires to follow additional steps.
+ - for MiniWoB++, see [miniwob/README.md](browsergym/miniwob/README.md)
+ - for WebArena, see [webarena/README.md](browsergym/webarena/README.md)
+ - for VisualWebArena, see [visualwebarena/README.md](browsergym/visualwebarena/README.md)
+ - for WorkArena, see [WorkArena](https://github.com/ServiceNow/WorkArena)
+ - for AssistantBench, see [assistantbench/README.md](browsergym/assistantbench/README.md)
+### 🏗️ Development setup
+To install browsergym locally for development, use the following commands:
+```sh
+git clone git@github.com:ServiceNow/BrowserGym.git
+cd BrowserGym
+make install
+```
+Contributions are welcome! 😊
+## 🏋 Usage
+Boilerplate code to run an agent on an interactive, open-ended task:
+```python
+import gymnasium as gym
+import browsergym.core  # register the openended task as a gym environment
+# start an openended environment
+env = gym.make(
+    "browsergym/openended",
+    task_kwargs={"start_url": "https://www.google.com/"},  # starting URL
+    wait_for_user_message=True,  # wait for a user message after each agent message sent to the chat
+)
+# run the environment <> agent loop until termination
+obs, info = env.reset()
+while True:
+    action = ...  # implement your agent here
+    obs, reward, terminated, truncated, info = env.step(action)
+    if terminated or truncated:
+        break
+# release the environment
+env.close()
+```
+MiniWoB
+```python
+import gymnasium as gym
+import browsergym.miniwob  # register miniwob tasks as gym environments
+# start a miniwob task
+env = gym.make("browsergym/miniwob.choose-list")
+...
+# list all the available miniwob tasks
+env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/miniwob")]
+print("\n".join(env_ids))
+```
+WorkArena
+```python
+import gymnasium as gym
+import browsergym.workarena  # register workarena tasks as gym environments
+# start a workarena task
+env = gym.make("browsergym/workarena.servicenow.order-ipad-pro")
+...
+# list all the available workarena tasks
+env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/workarena")]
+print("\n".join(env_ids))
+```
+WebArena
+```python
+import gymnasium as gym
+import browsergym.webarena  # register webarena tasks as gym environments
+# start a webarena task
+env = gym.make("browsergym/webarena.310")
+...
+# list all the available webarena tasks
+env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/webarena")]
+print("\n".join(env_ids))
+```
+VisualWebArena
+```python
+import gymnasium as gym
+import browsergym.webarena  # register webarena tasks as gym environments
+# start a visualwebarena task
+env = gym.make("browsergym/visualwebarena.721")
+...
+# list all the available visualwebarena tasks
+env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/visualwebarena")]
+print("\n".join(env_ids))
+```
+AssistantBench
+```python
+import gymnasium as gym
+import browsergym.workarena  # register assistantbench tasks as gym environments
+# start an assistantbench task
+env = gym.make("browsergym/assistantbench.validation.3")
+...
+# list all the available assistantbench tasks
+env_ids = [id for id in gym.envs.registry.keys() if id.startswith("browsergym/workarena")]
+print("\n".join(env_ids))
+```
+## 💻 Demo
+If you want to experiment with a demo agent in BrowserGym, follow these steps
+```sh
+# conda setup
+conda env create -f demo_agent/environment.yml
+conda activate demo_agent
+# or pip setup
+pip install -r demo_agent/requirements.txt
+# then download the browser for playwright
+playwright install chromium
+```
+Our demo agent uses `openai` as a backend, be sure to set your `OPENAI_API_KEY`.
+Launch the demo agent as follows
+```sh
+# openended (interactive chat mode)
+python demo_agent/run_demo.py --task_name openended --start_url https://www.google.com
+# miniwob
+python demo_agent/run_demo.py --task_name miniwob.click-test
+# workarena
+python demo_agent/run_demo.py --task_name workarena.servicenow.order-standard-laptop
+# webarena
+python demo_agent/run_demo.py --task_name webarena.4
+# visualwebarena
+python demo_agent/run_demo.py --task_name visualwebarena.398
+```
+You can customize your experience by changing the `model_name` to your preferred LLM (it uses `gpt-4o-mini` by default), adding screenshots for your VLMs with `use_screenshot`, and much more!
+```python
+python demo_agent/run_demo.py --help
+```
+## 🌐 Ecosystem
+- [AgentLab](https://github.com/ServiceNow/AgentLab): Seamlessly run agents on benchmarks, collect and analyse traces.
+- [WorkArena(++)](https://github.com/ServiceNow/WorkArena): A benchmark for web agents on the ServiceNow platform.
+- [WebArena](https://github.com/web-arena-x/webarena): A benchmark of realistic web tasks on self-hosted domains.
+- [VisualWebArena](https://github.com/web-arena-x/visualwebarena): A benchmark of realistic visual web tasks on self-hosted domains.
+- [MiniWoB(++)](https://miniwob.farama.org/): A collection of over 100 web tasks on synthetic web pages.
+- [WebLINX](https://github.com/McGill-NLP/weblinx): A dataset of real-world web interaction traces.
+- [AssistantBench](https://github.com/oriyor/assistantbench): A benchmark of realistic and time-consuming tasks on the open web.
+- [DoomArena](https://github.com/ServiceNow/DoomArena): A framework for AI agent security testing which supports injecting attacks into web pages from Browsergym environments.
+## 🌟 Contributors
+[![BrowserGym contributors](https://contrib.rocks/image?repo=ServiceNow/BrowserGym&max=2000)](https://github.com/ServiceNow/BrowserGym/graphs/contributors)
+## 📝 Citing This Work
+Please use the following BibTeX to cite our work:
+```tex
+@inproceedings{workarena2024,
+    title = {{W}ork{A}rena: How Capable are Web Agents at Solving Common Knowledge Work Tasks?},
+    author = {Drouin, Alexandre and Gasse, Maxime and Caccia, Massimo and Laradji, Issam H. and Del Verme, Manuel and Marty, Tom and Vazquez, David and Chapados, Nicolas and Lacoste, Alexandre},
+    booktitle = {Proceedings of the 41st International Conference on Machine Learning},
+    pages = {11642--11662},
+    year = {2024},
+    editor = {Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix},
+    volume = {235},
+    series = {Proceedings of Machine Learning Research},
+    month = {21--27 Jul},
+    publisher = {PMLR},
+    url = {https://proceedings.mlr.press/v235/drouin24a.html},
+}
+```

BrowserGym/browsergym/assistantbench/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# AssistantBench <> BrowserGym
+This package provides an implementation for using the [AssistantBench](https://assistantbench.github.io/) benchmark in BrowserGym.
+Because AssistantBench includes open-ended tasks, setup is extremely easy and simply requires installing the package.
+Please note that AssistantBench has a hidden test set, so test set predictions will need to be uploaded to the official [leaderboard](https://huggingface.co/spaces/AssistantBench/leaderboard).
+## Setting up
+- Install the package (this is still a wip)
+```
+pip install browsergym-assistantbench
+```
+- Run inference, e.g., run the following commands for demo on a simple toy task
+```
+python demo_agent/run_demo.py --task_name assistantbench.validation.3
+```
+- Test set predictions will be saved to `./assistantbench-predictions-test.jsonl`. To evaluate on the official test set, upload these predictions to the official [leaderboard](https://huggingface.co/spaces/AssistantBench/leaderboard).

BrowserGym/browsergym/assistantbench/pyproject.toml ADDED Viewed

	@@ -0,0 +1,35 @@

+[build-system]
+requires = ["hatchling", "hatch-requirements-txt"]
+build-backend = "hatchling.build"
+[project]
+name = "browsergym-assistantbench"
+description = "AssistantBench benchmark for BrowserGym"
+authors = [
+    {name = "Ori Yoran"},
+    {name = "Maxime Gasse"},
+]
+readme = "README.md"
+requires-python = ">3.7"
+license = {text = "Apache-2.0"}
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "License :: OSI Approved :: Apache Software License",
+]
+dynamic = ["dependencies", "version"]
+[project.urls]
+homepage = "https://github.com/ServiceNow/BrowserGym"
+[tool.hatch.version]
+path = "../core/src/browsergym/core/__init__.py"
+[tool.hatch.metadata.hooks.requirements_txt]
+files = ["requirements.txt"]
+[tool.hatch.build.targets.wheel]
+packages = ["src/browsergym"]

BrowserGym/browsergym/assistantbench/requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+browsergym-core==0.13.4
+datasets
+scipy
+numpy

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/__init__.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from browsergym.core.registration import register_task
+from . import task
+TOY_AB_TASK_IDS = []
+VALID_AB_TASK_IDS = []
+TEST_AB_TASK_IDS = []
+# register a toy easy task for testing implementation
+gym_id = f"assistantbench.imp.0"
+register_task(
+    gym_id,
+    task.AssistantBenchTask,
+    task_kwargs={
+        "task_id": f"imp.0",
+    },
+    default_task_kwargs={
+        "save_predictions": False,  # can be overriden
+    },
+)
+TOY_AB_TASK_IDS.append(gym_id)
+# register the AssistantBench dev set
+for task_id in range(33):
+    gym_id = f"assistantbench.validation.{task_id}"
+    register_task(
+        gym_id,
+        task.AssistantBenchTask,
+        task_kwargs={
+            "task_id": f"validation.{task_id}",
+        },
+        default_task_kwargs={
+            "save_predictions": False,  # can be overriden
+        },
+    )
+    VALID_AB_TASK_IDS.append(gym_id)
+# register the AssistantBench test set
+for task_id in range(181):
+    gym_id = f"assistantbench.test.{task_id}"
+    register_task(
+        gym_id,
+        task.AssistantBenchTask,
+        task_kwargs={
+            "task_id": f"test.{task_id}",
+        },
+        default_task_kwargs={
+            "save_predictions": True,  # can be overriden
+        },
+    )
+    TEST_AB_TASK_IDS.append(gym_id)
+ALL_AB_TASK_IDS = TOY_AB_TASK_IDS + VALID_AB_TASK_IDS + TEST_AB_TASK_IDS

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_dicts.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from typing import Dict, List
+import numpy as np
+from .utils import _align_bags
+def calculate_f1_score(precision, recall):
+    if precision + recall == 0:
+        return 0  # Handle the case to avoid division by zero
+    return 2 * (precision * recall) / (precision + recall)
+def calc_recall(pred: Dict, gold: Dict, use_gold_for_eval: bool):
+    from .evaluate_factory import get_evaluator_from_gold_answer
+    recall = []
+    for gold_key, gold_value in gold.items():
+        pred_value = pred.get(gold_key)
+        gold_value = fix_number(gold_value)
+        pred_value = fix_number(pred_value)
+        if gold_key not in pred:
+            recall.append(0)
+        else:
+            evaluator = (
+                get_evaluator_from_gold_answer(type(gold_value))
+                if use_gold_for_eval
+                else get_evaluator_from_gold_answer(type(pred_value))
+            )
+            if type(pred_value) != type(gold_value):
+                recall.append(0)
+                continue
+            recall.append(evaluator(pred_value, gold_value))
+    avg_recall = np.average(recall)
+    return avg_recall
+def fix_number(number):
+    if type(number) == str:
+        copy_ans = number
+        copy_ans = " ".join(
+            " ".join(" ".join(copy_ans.split("$")).split("%")).split("sqft")
+        ).strip()
+        copy_ans = copy_ans.strip()
+        copy_ans = copy_ans.replace(",", ".")
+        try:
+            return float(copy_ans)
+        except:
+            return number
+    elif type(number) == int:
+        return float(number)
+    else:
+        return number
+def evaluate_pair_of_dicts(pred: Dict, gold: Dict):
+    recall = calc_recall(pred, gold, True)
+    precision = calc_recall(gold, pred, False)
+    f1 = calculate_f1_score(precision, recall)
+    return f1
+def evaluate_dicts(pred: List[Dict], gold: List[Dict]):
+    if not (type(pred) == dict or len(pred) == 0 or (type(pred) == list and type(pred[0]) == dict)):
+        return 0
+    max_alignment_scores = _align_bags(pred, gold, evaluate_pair_of_dicts)
+    return np.average(max_alignment_scores)

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_factory.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import Union
+from .evaluate_dicts import evaluate_dicts
+from .evaluate_numbers import evaluate_numbers
+from .evaluate_strings import evaluate_strings
+EvaluatorFactory = {
+    "string": evaluate_strings,
+    "number": evaluate_numbers,
+    "json": evaluate_dicts,
+    "string list": evaluate_strings,
+}
+EvaluatorFactoryFromType = {
+    str: evaluate_strings,
+    int: evaluate_numbers,
+    float: evaluate_numbers,
+    bool: evaluate_strings,
+    list: evaluate_strings,
+}
+def get_evaluator(evaluator: str):
+    return EvaluatorFactory[evaluator]
+def get_evaluator_from_gold_answer(gold_answer: Union[str, int, float]):
+    return EvaluatorFactoryFromType[gold_answer]

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_numbers.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from typing import Union
+import numpy as np
+# Renamed calc_z function to distance_function_log
+def distance_function_log(pred: float, gold: float):
+    if pred == gold == 0:
+        return 1
+    if pred == 0:
+        pred = 1e-4
+    if gold == 0:
+        gold = 1e-4
+    if pred > gold:
+        return max(0, 1 - np.log(pred / gold))
+    else:
+        return max(0, 1 - np.log(gold / pred))
+def evaluate_numbers(pred: Union[float, str], gold: float):
+    res = None
+    if type(pred) != float and type(pred) != int:
+        try:
+            pred = float(pred)
+        except ValueError:
+            res = 0
+    if type(gold) != float and type(gold) != int:
+        try:
+            gold = float(gold)
+        except ValueError:
+            res = 0
+    if res is None:
+        res = distance_function_log(pred, gold)
+    return res

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/evaluate_strings.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+Evaluation for two strings or list of strings.
+Code taken from the DROP benchmark - https://github.com/allenai/allennlp-reading-comprehension/blob/master/allennlp_rc/eval/drop_eval.py
+"""
+import re
+import string
+from typing import List, Set, Tuple, Union
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+# From here through _normalize_answer was originally copied from:
+# https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/
+# Then cleaned up and modified a bit.
+def _remove_articles(text: str) -> str:
+    regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
+    return re.sub(regex, " ", text)
+def _white_space_fix(text: str) -> str:
+    return " ".join(text.split())
+EXCLUDE = set(string.punctuation)
+def _remove_punc(text: str) -> str:
+    if not _is_number(text):
+        return "".join(ch for ch in text if ch not in EXCLUDE)
+    else:
+        return text
+def _lower(text: str) -> str:
+    return text.lower()
+def _tokenize(text: str) -> List[str]:
+    return re.split(" |-", text)
+def _normalize_answer(text: str) -> str:
+    """Lower text and remove punctuation, articles and extra whitespace."""
+    parts = [
+        _white_space_fix(_remove_articles(_normalize_number(_remove_punc(_lower(token)))))
+        for token in _tokenize(text)
+    ]
+    parts = [part for part in parts if part.strip()]
+    normalized = " ".join(parts).strip()
+    return normalized
+def _is_number(text: str) -> bool:
+    try:
+        float(text)
+        return True
+    except ValueError:
+        return False
+def _normalize_number(text: str) -> str:
+    if _is_number(text):
+        return str(float(text))
+    else:
+        return text
+def _answer_to_bags(
+    answer: Union[str, List[str], Tuple[str, ...]]
+) -> Tuple[List[str], List[Set[str]]]:
+    if isinstance(answer, (list, tuple)):
+        raw_spans = answer
+    else:
+        raw_spans = [answer]
+    normalized_spans: List[str] = []
+    token_bags = []
+    for raw_span in raw_spans:
+        normalized_span = _normalize_answer(raw_span)
+        normalized_spans.append(normalized_span)
+        token_bags.append(set(normalized_span.split()))
+    return normalized_spans, token_bags
+def _align_bags(predicted: List[Set[str]], gold: List[Set[str]]) -> List[float]:
+    """
+    Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
+    between them and gets maximum metric values over all the answers.
+    """
+    scores = np.zeros([len(gold), len(predicted)])
+    for gold_index, gold_item in enumerate(gold):
+        for pred_index, pred_item in enumerate(predicted):
+            if _match_numbers_if_present(gold_item, pred_item):
+                scores[gold_index, pred_index] = _compute_f1(pred_item, gold_item)
+    row_ind, col_ind = linear_sum_assignment(-scores)
+    max_scores = np.zeros([max(len(gold), len(predicted))])
+    for row, column in zip(row_ind, col_ind):
+        max_scores[row] = max(max_scores[row], scores[row, column])
+    return max_scores
+def _compute_f1(predicted_bag: Set[str], gold_bag: Set[str]) -> float:
+    intersection = len(gold_bag.intersection(predicted_bag))
+    if not predicted_bag:
+        precision = 1.0
+    else:
+        precision = intersection / float(len(predicted_bag))
+    if not gold_bag:
+        recall = 1.0
+    else:
+        recall = intersection / float(len(gold_bag))
+    f1 = (
+        (2 * precision * recall) / (precision + recall)
+        if not (precision == 0.0 and recall == 0.0)
+        else 0.0
+    )
+    return f1
+def _match_numbers_if_present(gold_bag: Set[str], predicted_bag: Set[str]) -> bool:
+    gold_numbers = set()
+    predicted_numbers = set()
+    for word in gold_bag:
+        if _is_number(word):
+            gold_numbers.add(word)
+    for word in predicted_bag:
+        if _is_number(word):
+            predicted_numbers.add(word)
+    if (not gold_numbers) or gold_numbers.intersection(predicted_numbers):
+        return True
+    return False
+def get_metrics(
+    predicted: Union[str, List[str], Tuple[str, ...]],
+    gold: Union[str, List[str], Tuple[str, ...]],
+) -> Tuple[float, float]:
+    """
+    Takes a predicted answer and a gold answer (that are both either a string or a list of
+    strings), and returns exact match and the DROP F1 metric for the prediction.  If you are
+    writing a script for evaluating objects in memory (say, the output of predictions during
+    validation, or while training), this is the function you want to call, after using
+    :func:`answer_json_to_strings` when reading the gold answer from the released data file.
+    """
+    predicted_bags = _answer_to_bags(predicted)
+    gold_bags = _answer_to_bags(gold)
+    if set(predicted_bags[0]) == set(gold_bags[0]) and len(predicted_bags[0]) == len(gold_bags[0]):
+        exact_match = 1.0
+    else:
+        exact_match = 0.0
+    f1_per_bag = _align_bags(predicted_bags[1], gold_bags[1])
+    f1 = np.mean(f1_per_bag)
+    f1 = round(f1, 2)
+    return exact_match, f1
+def evaluate_strings(prediction, gold):
+    if type(prediction) != list and type(prediction) != str:
+        prediction = str(prediction)
+    if type(gold) != list and type(gold) != str:
+        gold = str(gold)
+    try:
+        predicted_bags = _answer_to_bags(prediction)
+        gold_bags = _answer_to_bags(gold)
+        f1_per_bag = _align_bags(predicted_bags[1], gold_bags[1])
+        f1 = np.mean(f1_per_bag)
+    except Exception:
+        f1 = 0.0
+    return f1

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluate_utils/utils.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from typing import Callable, List, Set
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+def _align_bags(
+    predicted: List[Set[str]],
+    gold: List[Set[str]],
+    method: Callable[[object, object], float],
+) -> List[float]:
+    """
+    Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
+    between them and gets maximum metric values over all the answers.
+    """
+    scores = np.zeros([len(gold), len(predicted)])
+    for gold_index, gold_item in enumerate(gold):
+        for pred_index, pred_item in enumerate(predicted):
+            scores[gold_index, pred_index] = method(pred_item, gold_item)
+    row_ind, col_ind = linear_sum_assignment(-scores)
+    max_scores = np.zeros([max(len(gold), len(predicted))])
+    for row, column in zip(row_ind, col_ind):
+        max_scores[row] = max(max_scores[row], scores[row, column])
+    return max_scores

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/evaluation/evaluator.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# todo export evaluation to a python package
+import json
+import numpy as np
+from .evaluate_utils.evaluate_factory import get_evaluator
+def find_isnan(samp):
+    try:
+        if np.isnan(samp):
+            return True
+        else:
+            return False
+    except:
+        return False
+def fix_ans(answer):
+    try:
+        answer = (
+            answer.replace("{'", '{"')
+            .replace("', '", '", "')
+            .replace("': '", '": "')
+            .replace("'}", '"}')
+        )
+        answer = answer.replace("': ", '": ')
+        return answer
+    except:
+        return answer
+def parse_answer(answer):
+    if len(answer) == 1:
+        ans, is_num = fix_number(answer[0])
+        if is_num:
+            return ans, "number"
+        try:
+            ans = json.loads(fix_ans(answer[0]))
+            return [ans], "json"
+        except:
+            ans, is_num = fix_number(answer[0])
+            if is_num:
+                return ans, "number"
+            else:
+                return answer[0], "string"
+    else:
+        try:
+            ans = [json.loads(fix_ans(ex)) for ex in answer]
+            return ans, "json"
+        except:
+            return answer, "string list"
+def fix_number(number):
+    if type(number) == str:
+        copy_ans = number
+        copy_ans = " ".join(
+            " ".join(" ".join(copy_ans.split("$")).split("%")).split("sqft")
+        ).strip()
+        copy_ans = copy_ans.strip()
+        copy_ans = copy_ans.replace(",", ".").replace(" square kilometers", "")
+        try:
+            return float(copy_ans), True
+        except:
+            return number, False
+    elif type(number) == int:
+        return float(number), True
+    else:
+        return number, True
+def fix_prediction(prediction, gold_answer, evaluator):
+    if (
+        type(prediction) == list
+        and len(prediction) == 1
+        and (
+            type(prediction[0]) == int
+            or ((type(prediction[0]) == str) and prediction[0].isnumeric())
+        )
+    ):
+        prediction = fix_number(prediction[0])
+    if type(prediction) != list:
+        prediction, is_num = fix_number(prediction)
+        if evaluator == "json":
+            try:
+                prediction = [json.loads(pred) for pred in prediction.split("\n")]
+            except:
+                prediction = [prediction]
+    if (hasattr(type(prediction), "__len__")) and (len(prediction) == 0):
+        return prediction, False
+    if (type(prediction) == list and len(prediction) > 1) and type(gold_answer) == float:
+        return prediction, False
+    return prediction, True
+def question_scorer(prediction, gold_answer):
+    try:
+        prediction = json.loads(prediction)
+    except:
+        prediction = prediction
+    answer_list = (
+        [x for x in gold_answer.split("\n") if len(x.strip()) > 0]
+        if type(gold_answer) != list
+        else gold_answer
+    )
+    gold_answer, evaluator = parse_answer(answer_list)
+    prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)
+    has_ans = 1.0
+    if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
+        has_ans = 0.0
+    if type(prediction) == list:
+        if all(
+            (type(pred) not in {float, int} and len(pred) == 0) or find_isnan(pred)
+            for pred in prediction
+        ):
+            has_ans = 0
+    if not run_eval:
+        return 0.0, has_ans
+    metric_eval = get_evaluator(evaluator)
+    accuracy = metric_eval(prediction, gold_answer)
+    return accuracy, has_ans

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/task.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import logging
+import os
+from typing import Dict, Tuple
+from datasets import load_dataset
+from playwright.sync_api import Page
+from browsergym.core.task import AbstractBrowserTask
+from .evaluation.evaluator import question_scorer
+from .utils import add_prediction_to_jsonl
+logger = logging.getLogger(__name__)
+_DEFAULT_OUTPUT_FILE = None
+def set_default_output_file(output_file: str):
+    global _DEFAULT_OUTPUT_FILE
+    _DEFAULT_OUTPUT_FILE = output_file
+def get_default_output_file():
+    return _DEFAULT_OUTPUT_FILE
+# Load dataset
+DATA_DATASET = "AssistantBench/AssistantBench"
+all_tasks = load_dataset(DATA_DATASET, trust_remote_code=True)
+# Extract answers and tasks for validation and test splits
+def extract_data(split_name: str) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
+    return (
+        {
+            f"{split_name}.{i}": row["answer"] if row["answer"] is not None else ""
+            for i, row in enumerate(all_tasks[split_name])
+        },
+        {f"{split_name}.{i}": row["task"] for i, row in enumerate(all_tasks[split_name])},
+        {f"{split_name}.{i}": row["id"] for i, row in enumerate(all_tasks[split_name])},
+    )
+# Implementation data for testing
+def get_implementation_testing_data() -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
+    return (
+        {"imp.0": "20"},
+        {
+            "imp.0": "What is the weather in Paris yesterday in Celsius? Answer with the number only."
+        },
+        {"imp.0": "test_imp_id_0"},
+    )
+# Combine dev, test, and implementation-specific testing splits
+gold_answers_dev, tasks_dev, ids_dev = extract_data("validation")
+gold_answers_test, tasks_test, ids_test = extract_data("test")
+gold_answers_impl_testing, tasks_test_impl_testing, ids_imp_testing = (
+    get_implementation_testing_data()
+)
+gold_answers = {**gold_answers_dev, **gold_answers_test, **gold_answers_impl_testing}
+tasks = {**tasks_dev, **tasks_test, **tasks_test_impl_testing}
+ids = {**ids_dev, **ids_test, **ids_imp_testing}
+class AssistantBenchTask(AbstractBrowserTask):
+    @classmethod
+    def get_task_id(cls) -> str:
+        """
+        Generic class for several task ids, this way of obtaining the task id is not compatible for now.
+        """
+        raise NotImplementedError
+    def __init__(
+        self, seed: int, task_id: str, output_file: str = None, save_predictions: bool = False
+    ) -> None:
+        """
+        Args:
+            seed (int): Random seed for task initialization.
+            task_id (str): Unique identifier for the task (for the BrowserGym environment).
+            output_file (str, optional): Path to the output file for saving results, needed for test set.
+            save_predictions (bool, optional): Save predictions to the output file (yes/no).
+        """
+        super().__init__(seed)
+        self.locale = "en-US"
+        self.timezone_id = "America/New_York"
+        self.task_id = task_id
+        self.start_url = "https://google.com"
+        self.goal = tasks[str(self.task_id)]
+        self.gold = gold_answers[str(self.task_id)]
+        self.ab_task_id = ids[self.task_id]
+        self.save_predictions = save_predictions
+        self.output_file = output_file
+        # set output_file using the global default value, if not provided in constructor
+        if not self.output_file:
+            self.output_file = get_default_output_file()
+        # use env variable in last resort
+        if not self.output_file:
+            self.output_file = os.getenv("ASSISTANTBENCH_OUTPUT_FILE", None)
+        if self.save_predictions and self.output_file:
+            logger.info(f"Task prediction will be written to output file {self.output_file}")
+    def setup(self, page: Page) -> Tuple[str, dict]:
+        logger.info(f"Navigating to start url: {self.start_url}")
+        page.goto(self.start_url, timeout=50000)
+        if self.save_predictions and self.output_file:
+            # create an empty task entry in the output file (will raise an Exception if the entry is already there)
+            add_prediction_to_jsonl(
+                file_path=self.output_file,
+                task_id=self.ab_task_id,
+                prediction="",
+                override_if_exists=False,
+            )
+        return self.goal, {}
+    def teardown(self) -> None:
+        pass
+    def validate(self, page: Page, chat_messages: list[dict]) -> Tuple[float, bool, str, dict]:
+        accuracy, done, msg, info = 0.0, False, "", {}
+        # eval when the agent returns a response
+        if chat_messages and chat_messages[-1]["role"] == "assistant":
+            done = True
+            prediction = chat_messages[-1]["message"]
+            if self.save_predictions and self.output_file:
+                # update the task entry in the output file
+                add_prediction_to_jsonl(
+                    file_path=self.output_file,
+                    task_id=self.ab_task_id,
+                    prediction=prediction,
+                    override_if_exists=True,
+                )
+            accuracy, has_ans = question_scorer(prediction, self.gold)
+        return accuracy, done, msg, info

BrowserGym/browsergym/assistantbench/src/browsergym/assistantbench/utils.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import json
+import logging
+import os
+import pathlib
+import time
+logger = logging.getLogger(__name__)
+def add_prediction_to_jsonl(
+    file_path: str, task_id: str, prediction: object, override_if_exists: bool
+) -> None:
+    """
+    Multiprocessing-safe file write.
+    """
+    lock_file_path = pathlib.Path(file_path).with_suffix(".lock")
+    lock_max_wait = 10  # 10 seconds
+    # Acquire lock (atomic file creation)
+    start_time = time.time()
+    while True:
+        try:
+            fd = os.open(lock_file_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+            with os.fdopen(fd, "w") as f:
+                f.write("lock")
+            break
+        except FileExistsError:
+            # give up if max wait time reached
+            seconds_waited = time.time() - start_time
+            if seconds_waited >= lock_max_wait:
+                raise RuntimeError(
+                    f"Lock file could not be acquired after {seconds_waited} seconds ({lock_file_path})"
+                )
+            # wait for lock release
+            logger.info(f"Waiting for lock file to be released: {lock_file_path}")
+            time.sleep(1)  # 1 sec
+    logger.info(f"Lock file acquired: {lock_file_path}")
+    # Check if the file exists, if not, create it
+    if not os.path.exists(file_path):
+        with open(file_path, "w") as f:
+            pass  # Create an empty file
+    # Load existing data, if any
+    data = []
+    if os.path.exists(file_path):
+        with open(file_path, "r") as f:
+            data.extend([json.loads(line) for line in f if line.strip()])  # Skip empty lines
+    # Check if task_id already exists
+    existing_record = next((entry for entry in data if entry["id"] == task_id), None)
+    # Add or update the record
+    if not existing_record:
+        # Add new record
+        data.append({"id": task_id, "answer": prediction})
+    elif override_if_exists:
+        # Update existing record
+        existing_record["answer"] = prediction
+    else:
+        raise ValueError(
+            f"Prediction for task ID {repr(task_id)} already exists in file {file_path}."
+        )
+    # Write data back to the file
+    with open(file_path, "w") as f:
+        for entry in data:
+            f.write(json.dumps(entry) + "\n")
+    # Release lock (remove file)
+    os.remove(lock_file_path)
+    logger.info(f"Lock file released: {lock_file_path}")

BrowserGym/browsergym/browsergym.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,22 @@

+Metadata-Version: 2.4
+Name: browsergym
+Version: 0.13.4
+Summary: BrowserGym: a gym environment for web task automation in the Chromium browser
+Author: Rim Assouel, Léo Boisvert, Massimo Caccia, Alex Drouin, Maxime Gasse, Imene Kerboua, Alex Lacoste, Thibault Le Sellier De Chezelles, Tom Marty, Aman Jaiswal
+License: Apache-2.0
+Classifier: Development Status :: 3 - Alpha
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: License :: OSI Approved :: Apache Software License
+Requires-Python: >3.10
+Description-Content-Type: text/markdown
+Requires-Dist: browsergym-core==0.13.4
+Requires-Dist: browsergym-miniwob==0.13.4
+Requires-Dist: browsergym-webarena==0.13.4
+Requires-Dist: browsergym-visualwebarena==0.13.4
+Requires-Dist: browsergym-assistantbench==0.13.4
+Requires-Dist: browsergym-experiments==0.13.4
+Requires-Dist: browsergym-workarena>=0.4.1
+Requires-Dist: weblinx-browsergym>=0.0.2

BrowserGym/browsergym/browsergym.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+pyproject.toml
+browsergym.egg-info/PKG-INFO
+browsergym.egg-info/SOURCES.txt
+browsergym.egg-info/dependency_links.txt
+browsergym.egg-info/requires.txt
+browsergym.egg-info/top_level.txt

BrowserGym/browsergym/browsergym.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

BrowserGym/browsergym/browsergym.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+browsergym-core==0.13.4
+browsergym-miniwob==0.13.4
+browsergym-webarena==0.13.4
+browsergym-visualwebarena==0.13.4
+browsergym-assistantbench==0.13.4
+browsergym-experiments==0.13.4
+browsergym-workarena>=0.4.1
+weblinx-browsergym>=0.0.2

BrowserGym/browsergym/browsergym.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

BrowserGym/browsergym/core/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+# BrowserGym core
+This package provides `browsergym.core`, which provides the core functionalities of [BrowserGym](https://github.com/ServiceNow/BrowserGym).
+## Setup
+1. Install the package
+```sh
+pip install browsergym-core
+```

BrowserGym/browsergym/core/pyproject.toml ADDED Viewed

	@@ -0,0 +1,42 @@

+[build-system]
+requires = ["hatchling", "hatch-requirements-txt"]
+build-backend = "hatchling.build"
+[project]
+name = "browsergym-core"
+description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
+authors = [
+    {name = "Rim Assouel"},
+    {name = "Léo Boisvert"},
+    {name = "Massimo Caccia"},
+    {name = "Alex Drouin"},
+    {name = "Maxime Gasse"},
+    {name = "Imene Kerboua"},
+    {name = "Alex Lacoste"},
+    {name = "Thibault Le Sellier De Chezelles"},
+    {name = "Tom Marty"},
+]
+readme = "README.md"
+requires-python = ">3.9"
+license = {text = "Apache-2.0"}
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "License :: OSI Approved :: Apache Software License",
+]
+dynamic = ["dependencies", "version"]
+[project.urls]
+homepage = "https://github.com/ServiceNow/BrowserGym"
+[tool.hatch.version]
+path = "src/browsergym/core/__init__.py"
+[tool.hatch.metadata.hooks.requirements_txt]
+files = ["requirements.txt"]
+[tool.hatch.build.targets.wheel]
+packages = ["src/browsergym"]

BrowserGym/browsergym/core/requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+playwright==1.44
+gymnasium>=0.27
+numpy>=1.14
+pyparsing>=3
+Pillow>=10.1
+beautifulsoup4>=4.12
+lxml>=4.9
+mcp[cli]>=1.6.0

BrowserGym/browsergym/core/src/browsergym/core/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+__version__ = "0.13.4"
+import playwright.sync_api
+# we use a global playwright instance
+_PLAYWRIGHT = None
+def _set_global_playwright(pw: playwright.sync_api.Playwright):
+    global _PLAYWRIGHT
+    _PLAYWRIGHT = pw
+def _get_global_playwright():
+    global _PLAYWRIGHT
+    if not _PLAYWRIGHT:
+        pw = playwright.sync_api.sync_playwright().start()
+        _set_global_playwright(pw)
+    return _PLAYWRIGHT
+# register the open-ended task
+from .registration import register_task
+from .task import OpenEndedTask
+register_task(OpenEndedTask.get_task_id(), OpenEndedTask)

BrowserGym/browsergym/core/src/browsergym/core/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.14 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/__pycache__/chat.cpython-311.pyc ADDED Viewed

Binary file (6.89 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/__pycache__/constants.cpython-311.pyc ADDED Viewed

Binary file (428 Bytes). View file

BrowserGym/browsergym/core/src/browsergym/core/__pycache__/env.cpython-311.pyc ADDED Viewed

Binary file (31.2 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/__pycache__/observation.cpython-311.pyc ADDED Viewed

Binary file (22.7 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/__pycache__/registration.cpython-311.pyc ADDED Viewed

Binary file (3.49 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/__pycache__/spaces.cpython-311.pyc ADDED Viewed

Binary file (8.42 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/__pycache__/task.cpython-311.pyc ADDED Viewed

Binary file (5.53 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/action/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+_DEMO_MODE = False
+def set_global_demo_mode(demo_mode: bool):
+    global _DEMO_MODE
+    _DEMO_MODE = demo_mode
+def get_global_demo_mode():
+    global _DEMO_MODE
+    return _DEMO_MODE

BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (561 Bytes). View file

BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (3.12 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/functions.cpython-311.pyc ADDED Viewed

Binary file (26.2 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/highlevel.cpython-311.pyc ADDED Viewed

Binary file (12.4 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/parsers.cpython-311.pyc ADDED Viewed

Binary file (6.82 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/action/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (12.2 kB). View file

BrowserGym/browsergym/core/src/browsergym/core/action/base.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from abc import ABC, abstractmethod
+import playwright.sync_api
+from . import get_global_demo_mode
+class AbstractActionSet(ABC):
+    def __init__(self, strict: bool = False):
+        self.strict = strict
+    @abstractmethod
+    def describe(self, with_long_description: bool = True, with_examples: bool = True) -> str:
+        """
+        Returns a textual description of this action space.
+        """
+    @abstractmethod
+    def example_action(self, abstract: bool) -> str:
+        """
+        Returns an example action as a string.
+        """
+    @abstractmethod
+    def to_python_code(self, action) -> str:
+        """
+        Converts the given action to browsergym-compatible python code.
+        Args:
+            action: the action to convert.
+        Returns:
+            Executable python code that performs the action in a browsergym environment.
+        """
+def execute_python_code(
+    code: str,
+    page: playwright.sync_api.Page,
+    send_message_to_user: callable,
+    report_infeasible_instructions: callable,
+):
+    """
+    Executes Python code in a new context, except for a playwright `page` object and a `send_message_to_user` function.
+    WARNING: this is not safe!
+    https://stackoverflow.com/questions/77655440/can-you-protect-a-python-variable-with-exec
+    Args:
+        code: the Python code to execute, as a string.
+        page: the playwright page that will be made accessible to the code.
+        send_message_to_user: utility function that will be made accessible to the code. It should take one text argument.
+        report_infeasible_instructions: utility function that will be made accessible to the code. It should take one text argument.
+    """
+    globals = {
+        "page": page,
+        "send_message_to_user": send_message_to_user,
+        "report_infeasible_instructions": report_infeasible_instructions,
+        "DEMO_MODE": get_global_demo_mode(),
+    }
+    exec(code, globals)

BrowserGym/browsergym/core/src/browsergym/core/action/functions.py ADDED Viewed

	@@ -0,0 +1,624 @@

+# these are placeholders
+# all these symbols will be available in browsergym actions
+from typing import Literal
+import playwright.sync_api
+from .utils import (
+    add_demo_mode_effects,
+    call_fun,
+    get_elem_by_bid,
+    highlight_by_box,
+    smooth_move_visual_cursor_to,
+)
+page: playwright.sync_api.Page = None
+send_message_to_user: callable = None
+report_infeasible_instructions: callable = None
+demo_mode: Literal["off", "default", "all_blue", "only_visible_elements"] = None
+retry_with_force: bool = False
+"""IMPORTANT
+The following primitives are meant to be included in the browsergym action using
+inspect.getsource().
+"""
+def send_msg_to_user(text: str):
+    """
+    Sends a message to the user.
+    Examples:
+        send_msg_to_user("Based on the results of my search, the city was built in 1751.")
+    """
+    send_message_to_user(text)
+def report_infeasible(reason: str):
+    """
+    Notifies the user that their instructions are infeasible.
+    Examples:
+        report_infeasible("I cannot follow these instructions because there is no email field in this form.")
+    """
+    report_infeasible_instructions(reason)
+def noop(wait_ms: float = 1000):
+    """
+    Do nothing, and optionally wait for the given time (in milliseconds).
+    Examples:
+        noop()
+        noop(500)
+    """
+    page.wait_for_timeout(wait_ms)
+# https://playwright.dev/docs/input#text-input
+def fill(bid: str, value: str):
+    """
+    Fill out a form field. It focuses the element and triggers an input event with the entered text.
+    It works for <input>, <textarea> and [contenteditable] elements.
+    Examples:
+        fill('237', 'example value')
+        fill('45', "multi-line\\nexample")
+        fill('a12', "example with \\"quotes\\"")
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
+    def do(force: bool):
+        if demo_mode != "off":
+            delay = max(2000 / len(value), 10)
+            elem.clear(force=force, timeout=500)
+            elem.type(value, delay=delay, timeout=0)  # no timeout
+        else:
+            elem.fill(value, force=force, timeout=500)
+    call_fun(do, retry_with_force)
+# https://playwright.dev/python/docs/api/class-locator#locator-check
+def check(bid: str):
+    """
+    Ensure a checkbox or radio element is checked.
+    Examples:
+        check('55')
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
+    def do(force: bool):
+        elem.check(force=force, timeout=500)
+    call_fun(do, retry_with_force)
+# https://playwright.dev/python/docs/api/class-locator#locator-uncheck
+def uncheck(bid: str):
+    """
+    Ensure a checkbox or radio element is unchecked.
+    Examples:
+        uncheck('a5289')
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
+    def do(force: bool):
+        elem.uncheck(force=force, timeout=500)
+    call_fun(do, retry_with_force)
+# https://playwright.dev/docs/input#select-options
+def select_option(bid: str, options: str | list[str]):
+    """
+    Select one or multiple options in a <select> element. You can specify
+    option value or label to select. Multiple options can be selected.
+    Examples:
+        select_option('a48', "blue")
+        select_option('c48', ["red", "green", "blue"])
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
+    def do(force: bool):
+        elem.select_option(options, force=force, timeout=500)
+    call_fun(do, retry_with_force)
+# https://playwright.dev/python/docs/api/class-locator#locator-click
+def click(
+    bid: str,
+    button: Literal["left", "middle", "right"] = "left",
+    modifiers: list[Literal["Alt", "Control", "ControlOrMeta", "Meta", "Shift"]] = [],
+):
+    """
+    Click an element.
+    Examples:
+        click('a51')
+        click('b22', button="right")
+        click('48', button="middle", modifiers=["Shift"])
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
+    def do(force: bool):
+        elem.click(button=button, modifiers=modifiers, force=force, timeout=500)
+    call_fun(do, retry_with_force)
+# https://playwright.dev/python/docs/api/class-locator#locator-dblclick
+def dblclick(
+    bid: str,
+    button: Literal["left", "middle", "right"] = "left",
+    modifiers: list[Literal["Alt", "Control", "ControlOrMeta", "Meta", "Shift"]] = [],
+):
+    """
+    Double click an element.
+    Examples:
+        dblclick('12')
+        dblclick('ca42', button="right")
+        dblclick('178', button="middle", modifiers=["Shift"])
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
+    def do(force: bool):
+        elem.click(button=button, modifiers=modifiers, force=force, timeout=500)
+    call_fun(do, retry_with_force)
+# https://playwright.dev/python/docs/api/class-locator#locator-hover
+def hover(bid: str):
+    """
+    Hover over an element.
+    Examples:
+        hover('b8')
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(
+        page, elem, bid, demo_mode=demo_mode, move_cursor=True, highlight_box=False
+    )
+    def do(force: bool):
+        elem.hover(force=force, timeout=500)
+    call_fun(do, retry_with_force)
+# https://playwright.dev/python/docs/input#keys-and-shortcuts
+def press(bid: str, key_comb: str):
+    """
+    Focus the matching element and press a combination of keys. It accepts
+    the logical key names that are emitted in the keyboardEvent.key property
+    of the keyboard events: Backquote, Minus, Equal, Backslash, Backspace,
+    Tab, Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
+    ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
+    alternatively specify a single character you'd like to produce such as "a"
+    or "#". Following modification shortcuts are also supported: Shift, Control,
+    Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta resolves to Control on
+    Windows and Linux and to Meta on macOS.
+    Examples:
+        press('88', 'Backspace')
+        press('a26', 'ControlOrMeta+a')
+        press('a61', 'Meta+Shift+t')
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
+    elem.press(key_comb, timeout=500)
+# https://playwright.dev/python/docs/api/class-locator#locator-focus
+def focus(bid: str):
+    """
+    Focus the matching element.
+    Examples:
+        focus('b455')
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
+    elem.focus(timeout=500)
+# https://playwright.dev/python/docs/api/class-locator#locator-clear
+def clear(bid: str):
+    """
+    Clear the input field.
+    Examples:
+        clear('996')
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
+    elem.clear(timeout=500)
+# https://playwright.dev/python/docs/input#drag-and-drop
+def drag_and_drop(from_bid: str, to_bid: str):
+    """
+    Perform a drag & drop. Hover the element that will be dragged. Press
+    left mouse button. Move mouse to the element that will receive the
+    drop. Release left mouse button.
+    Examples:
+        drag_and_drop('56', '498')
+    """
+    from_elem = get_elem_by_bid(page, from_bid, demo_mode != "off")
+    add_demo_mode_effects(page, from_elem, from_bid, demo_mode=demo_mode, move_cursor=True)
+    from_elem.hover(timeout=500)
+    page.mouse.down()
+    to_elem = get_elem_by_bid(page, to_bid, demo_mode != "off")
+    add_demo_mode_effects(page, to_elem, to_bid, demo_mode=demo_mode, move_cursor=True)
+    to_elem.hover(timeout=500)
+    page.mouse.up()
+# https://playwright.dev/python/docs/api/class-mouse#mouse-wheel
+def scroll(delta_x: float, delta_y: float):
+    """
+    Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event.
+    Examples:
+        scroll(0, 200)
+        scroll(-50.2, -100.5)
+    """
+    page.mouse.wheel(delta_x, delta_y)
+# https://playwright.dev/python/docs/api/class-mouse#mouse-move
+def mouse_move(x: float, y: float):
+    """
+    Move the mouse to a location. Uses absolute client coordinates in pixels.
+    Dispatches a mousemove event.
+    Examples:
+        mouse_move(65.2, 158.5)
+    """
+    if demo_mode != "off":
+        smooth_move_visual_cursor_to(page, x, y)
+    page.mouse.move(x, y)
+# https://playwright.dev/python/docs/api/class-mouse#mouse-up
+def mouse_up(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
+    """
+    Move the mouse to a location then release a mouse button. Dispatches
+    mousemove and mouseup events.
+    Examples:
+        mouse_up(250, 120)
+        mouse_up(47, 252, 'right')
+    """
+    if demo_mode != "off":
+        smooth_move_visual_cursor_to(page, x, y)
+        highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
+    page.mouse.move(x, y)
+    page.mouse.up(button=button)
+# https://playwright.dev/python/docs/api/class-mouse#mouse-down
+def mouse_down(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
+    """
+    Move the mouse to a location then press and hold a mouse button. Dispatches
+    mousemove and mousedown events.
+    Examples:
+        mouse_down(140.2, 580.1)
+        mouse_down(458, 254.5, 'middle')
+    """
+    if demo_mode != "off":
+        smooth_move_visual_cursor_to(page, x, y)
+        highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
+    page.mouse.move(x, y)
+    page.mouse.down(button=button)
+# https://playwright.dev/python/docs/api/class-mouse#mouse-click
+def mouse_click(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
+    """
+    Move the mouse to a location and click a mouse button. Dispatches mousemove,
+    mousedown and mouseup events.
+    Examples:
+        mouse_click(887.2, 68)
+        mouse_click(56, 712.56, 'right')
+    """
+    if demo_mode != "off":
+        smooth_move_visual_cursor_to(page, x, y)
+        highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
+    page.mouse.click(x, y, button=button)
+# https://playwright.dev/python/docs/api/class-mouse#mouse-dblclick
+def mouse_dblclick(x: float, y: float, button: Literal["left", "middle", "right"] = "left"):
+    """
+    Move the mouse to a location and double click a mouse button. Dispatches
+    mousemove, mousedown and mouseup events.
+    Examples:
+        mouse_dblclick(5, 236)
+        mouse_dblclick(87.5, 354, 'right')
+    """
+    if demo_mode != "off":
+        smooth_move_visual_cursor_to(page, x, y)
+        highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
+    page.mouse.dblclick(x, y, button=button)
+def mouse_drag_and_drop(from_x: float, from_y: float, to_x: float, to_y: float):
+    """
+    Drag and drop from a location to a location. Uses absolute client
+    coordinates in pixels. Dispatches mousemove, mousedown and mouseup
+    events.
+    Examples:
+        mouse_drag_and_drop(10.7, 325, 235.6, 24.54)
+    """
+    if demo_mode != "off":
+        x, y = from_x, from_y
+        smooth_move_visual_cursor_to(page, x, y)
+        highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
+    page.mouse.move(from_x, from_y)
+    page.mouse.down()
+    if demo_mode != "off":
+        x, y = to_x, to_y
+        smooth_move_visual_cursor_to(page, x, y)
+        highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
+    page.mouse.move(to_x, to_y)
+    page.mouse.up()
+# https://playwright.dev/python/docs/api/class-keyboard#keyboard-press
+def keyboard_press(key: str):
+    """
+    Press a combination of keys. Accepts the logical key names that are
+    emitted in the keyboardEvent.key property of the keyboard events:
+    Backquote, Minus, Equal, Backslash, Backspace, Tab, Delete, Escape,
+    ArrowDown, End, Enter, Home, Insert, PageDown, PageUp, ArrowRight,
+    ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
+    alternatively specify a single character you'd like to produce such
+    as "a" or "#". Following modification shortcuts are also supported:
+    Shift, Control, Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta
+    resolves to Control on Windows and Linux and to Meta on macOS.
+    Examples:
+        keyboard_press('Backspace')
+        keyboard_press('ControlOrMeta+a')
+        keyboard_press('Meta+Shift+t')
+        page.keyboard.press("PageDown")
+    """
+    page.keyboard.press(key)
+# https://playwright.dev/python/docs/api/class-keyboard#keyboard-up
+def keyboard_up(key: str):
+    """
+    Release a keyboard key. Dispatches a keyup event. Accepts the logical
+    key names that are emitted in the keyboardEvent.key property of the
+    keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab,
+    Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
+    ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc.
+    You can alternatively specify a single character you'd like to produce
+    such as "a" or "#".
+    Examples:
+        keyboard_up('Shift')
+        keyboard_up('c')
+    """
+    page.keyboard.up(key)
+# https://playwright.dev/python/docs/api/class-keyboard#keyboard-down
+def keyboard_down(key: str):
+    """
+    Press and holds a keyboard key. Dispatches a keydown event. Accepts the
+    logical key names that are emitted in the keyboardEvent.key property of
+    the keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab,
+    Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp,
+    ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can
+    alternatively specify a single character such as "a" or "#".
+    Examples:
+        keyboard_up('Shift')
+        keyboard_up('c')
+    """
+    page.keyboard.down(key)
+# https://playwright.dev/python/docs/api/class-keyboard#keyboard-type
+def keyboard_type(text: str):
+    """
+    Types a string of text through the keyboard. Sends a keydown, keypress/input,
+    and keyup event for each character in the text. Modifier keys DO NOT affect
+    keyboard_type. Holding down Shift will not type the text in upper case.
+    Examples:
+        keyboard_type('Hello world!')
+    """
+    if demo_mode != "off":
+        delay = max(2000 / len(text), 10)
+    else:
+        delay = None
+    page.keyboard.type(text, delay=delay)
+# https://playwright.dev/python/docs/api/class-keyboard#keyboard-insert-text
+def keyboard_insert_text(text: str):
+    """
+    Insert a string of text in the currently focused element. Dispatches only input
+    event, does not emit the keydown, keyup or keypress events. Modifier keys DO NOT
+    affect keyboard_insert_text. Holding down Shift will not type the text in upper
+    case.
+    Examples:
+        keyboard_insert_text('Hello world!')
+    """
+    page.keyboard.insert_text(text)
+# https://playwright.dev/python/docs/api/class-page#page-goto
+def goto(url: str):
+    """
+    Navigate to a url.
+    Examples:
+        goto('http://www.example.com')
+    """
+    page.goto(url)
+# https://playwright.dev/python/docs/api/class-page#page-go-back
+def go_back():
+    """
+    Navigate to the previous page in history.
+    Examples:
+        go_back()
+    """
+    page.go_back()
+# https://playwright.dev/python/docs/api/class-page#page-go-forward
+def go_forward():
+    """
+    Navigate to the next page in history.
+    Examples:
+        go_forward()
+    """
+    page.go_forward()
+# https://playwright.dev/python/docs/api/class-browsercontext#browser-context-new-page
+def new_tab():
+    """
+    Open a new tab. It will become the active one.
+    Examples:
+        new_tab()
+    """
+    global page
+    # set the new page as the active page
+    page = page.context.new_page()
+    # trigger the callback that sets this page as active in browsergym
+    page.evaluate(
+        """\
+const event = new Event('pageshow', {
+    bubbles: true,  // Whether the event bubbles up through the DOM or not
+    cancelable: false  // Whether the event can be canceled
+});
+window.dispatchEvent(event);
+"""
+    )
+# https://playwright.dev/python/docs/api/class-page#page-close
+def tab_close():
+    """
+    Close the current tab.
+    Examples:
+        tab_close()
+    """
+    global page
+    context = page.context
+    page.close()
+    # set most recent page as active page, or open a new page if needed
+    if context.pages:
+        # TODO: do something more elaborate? (active page history)
+        page = context.pages[-1]
+    else:
+        page = context.new_page()
+    # trigger the callback that sets this page as active in browsergym
+    page.evaluate(
+        """\
+const event = new Event('pageshow', {
+    bubbles: true,  // Whether the event bubbles up through the DOM or not
+    cancelable: false  // Whether the event can be canceled
+});
+window.dispatchEvent(event);
+"""
+    )
+# https://playwright.dev/python/docs/api/class-page#page-bring-to-front
+def tab_focus(index: int):
+    """
+    Bring tab to front (activate tab).
+    Examples:
+        tab_focus(2)
+    """
+    global page  # set the focused page as the active page
+    page = page.context.pages[index]
+    page.bring_to_front()
+    # trigger the callback that sets this page as active in browsergym
+    page.evaluate(
+        """\
+const event = new Event('pageshow', {
+    bubbles: true,  // Whether the event bubbles up through the DOM or not
+    cancelable: false  // Whether the event can be canceled
+});
+window.dispatchEvent(event);
+"""
+    )
+# https://playwright.dev/python/docs/input#upload-files
+def upload_file(bid: str, file: str | list[str]):
+    """
+    Click an element and wait for a "filechooser" event, then select one
+    or multiple input files for upload. Relative file paths are resolved
+    relative to the current working directory. An empty list clears the
+    selected files.
+    Examples:
+        upload_file("572", "my_receipt.pdf")
+        upload_file("63", ["/home/bob/Documents/image.jpg", "/home/bob/Documents/file.zip"])
+    """
+    elem = get_elem_by_bid(page, bid, demo_mode != "off")
+    add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=True)
+    with page.expect_file_chooser() as fc_info:
+        elem.click(timeout=500)
+    file_chooser = fc_info.value
+    file_chooser.set_files(file)
+# https://playwright.dev/python/docs/input#upload-files
+def mouse_upload_file(x: float, y: float, file: str | list[str]):
+    """
+    Click a location and wait for a "filechooser" event, then select one
+    or multiple input files for upload. Relative file paths are resolved
+    relative to the current working directory. An empty list clears the
+    selected files.
+    Examples:
+        mouse_upload_file(132.1, 547, "my_receipt.pdf")
+        mouse_upload_file(328, 812, ["/home/bob/Documents/image.jpg", "/home/bob/Documents/file.zip"])
+    """
+    if demo_mode != "off":
+        smooth_move_visual_cursor_to(page, x, y)
+        highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
+    with page.expect_file_chooser() as fc_info:
+        page.mouse.click(x, y)
+    file_chooser = fc_info.value
+    file_chooser.set_files(file)

BrowserGym/browsergym/core/src/browsergym/core/action/highlevel.py ADDED Viewed

	@@ -0,0 +1,522 @@

+import inspect
+import random
+import typing
+from dataclasses import dataclass
+from . import utils
+from .base import AbstractActionSet
+from .functions import (  # check,; uncheck,
+    clear,
+    click,
+    dblclick,
+    drag_and_drop,
+    fill,
+    focus,
+    go_back,
+    go_forward,
+    goto,
+    hover,
+    keyboard_down,
+    keyboard_insert_text,
+    keyboard_press,
+    keyboard_type,
+    keyboard_up,
+    mouse_click,
+    mouse_dblclick,
+    mouse_down,
+    mouse_drag_and_drop,
+    mouse_move,
+    mouse_up,
+    mouse_upload_file,
+    new_tab,
+    noop,
+    press,
+    report_infeasible,
+    scroll,
+    select_option,
+    send_msg_to_user,
+    tab_close,
+    tab_focus,
+    upload_file,
+)
+from .parsers import action_docstring_parser, highlevel_action_parser
+ACTION_SUBSETS = {
+    "chat": [send_msg_to_user],
+    "infeas": [report_infeasible],
+    "bid": [
+        scroll,
+        fill,
+        # These are not really needed and might pollute the action space, doing more harm than good
+        # check,
+        # uncheck,
+        select_option,
+        click,
+        dblclick,
+        hover,
+        press,
+        focus,
+        clear,
+        drag_and_drop,
+        upload_file,
+    ],
+    "coord": [
+        scroll,
+        mouse_move,
+        mouse_up,
+        mouse_down,
+        mouse_click,
+        mouse_dblclick,
+        mouse_drag_and_drop,
+        mouse_upload_file,
+        keyboard_down,
+        keyboard_up,
+        keyboard_press,
+        keyboard_type,
+        keyboard_insert_text,
+    ],
+    "nav": [go_back, go_forward, goto],
+    "tab": [
+        tab_close,
+        tab_focus,
+        new_tab,
+    ],
+    # adapted from MiniWoB repo
+    # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L122
+    "miniwob_all": [
+        mouse_move,  #     MOVE_COORDS
+        mouse_click,  #    CLICK_COORDS
+        mouse_dblclick,  # DBLCLICK_COORDS
+        mouse_down,  #     MOUSEDOWN_COORDS
+        mouse_up,  #       MOUSEUP_COORDS
+        scroll,  #         SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
+        click,  #          CLICK_ELEMENT
+        keyboard_press,  # PRESS_KEY
+        keyboard_type,  #  TYPE_TEX (and substitute for TYPE_FIELD()
+        fill,  #           FOCUS_ELEMENT_AND_TYPE_TEXT (and substitute for FOCUS_ELEMENT_AND_TYPE_FIELD)
+    ],
+    # adapted from MiniWoB repo
+    # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L142
+    "miniwob_shi17": [
+        mouse_click,  #    CLICK_COORDS
+        mouse_dblclick,  # DBLCLICK_COORDS
+        mouse_down,  #     MOUSEDOWN_COORDS
+        mouse_up,  #       MOUSEUP_COORDS
+        scroll,  #         SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
+        keyboard_press,  # PRESS_KEY
+    ],
+    # adapted from MiniWoB repo
+    # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L160
+    "miniwob_liu18": [
+        click,  # CLICK_ELEMENT
+        fill,  #  substitute for FOCUS_ELEMENT_AND_TYPE_FIELD
+    ],
+    # adapted from MiniWoB repo
+    # https://github.com/Farama-Foundation/miniwob-plusplus/blob/1bab0dffe34e92cc1049fe9443542029bf7e44a9/miniwob/action.py#L173
+    "miniwob_humphreys22": [
+        mouse_move,  #     MOVE_COORDS
+        mouse_click,  #    CLICK_COORDS
+        mouse_dblclick,  # DBLCLICK_COORDS
+        mouse_down,  #     MOUSEDOWN_COORDS
+        mouse_up,  #       MOUSEUP_COORDS
+        scroll,  #         SCROLL_UP_COORDS, SCROLL_DOWN_COORDS
+        keyboard_press,  # PRESS_KEY
+        keyboard_type,  #  substitute for TYPE_FIELD
+    ],
+    # from the webarena paper
+    # https://arxiv.org/abs/2307.13854
+    # from the webarena source code
+    # https://github.com/web-arena-x/webarena/blob/e31c190c9b43f63e5724322b847e00249300df40/browser_env/actions.py#L240
+    # from the webarena default prompt
+    # https://github.com/web-arena-x/webarena/blob/e31c190c9b43f63e5724322b847e00249300df40/agent/prompts/raw/p_cot_id_actree_2s.py#L13
+    "webarena": [
+        #                   #     code      |      paper       |      prompt
+        scroll,  #            SCROLL        | scroll(dir)      | scroll [down|up]
+        keyboard_press,  #    KEY_PRESS     | press(key_comb)  | press [key_comb]
+        #                     MOUSE_CLICK   |                  |
+        #                     KEYBOARD_TYPE |                  |
+        #                     MOUSE_HOVER   |                  |
+        click,  #             CLICK         | click(elem)      | click [id]
+        fill,  #              TYPE          | type(elem, text) | type [id] [content]
+        hover,  #             HOVER         | hover(elem)      | hover [id]
+        tab_focus,  #         PAGE_FOCUS    | tab_focus(index) | tab_focus [tab_index]
+        new_tab,  #           NEW_TAB       | new_tab()        | new_tab
+        go_back,  #           GO_BACK       | go_back()        | go_back
+        go_forward,  #        GO_FORWARD    | go_forward()     | go_forward
+        goto,  #              GOTO_URL      | goto(url)        | goto [url]
+        tab_close,  #         PAGE_CLOSE    | tab_close()      | close_tab
+        #                     CHECK         |                  |
+        select_option,  #     SELECT_OPTION |                  |
+        send_msg_to_user,  #  STOP          | stop(answer)     | stop [answer]
+        report_infeasible,  ## explicit unachievable action, equivalent STOP "N/A"
+    ],
+    # from the visualwebarena paper
+    # https://arxiv.org/abs/2401.13649
+    # from the visualwebarena source code
+    # https://github.com/web-arena-x/visualwebarena/blob/15890922c97a8694e366fde2d7de8dbd1ff63fb5/browser_env/actions.py#L311-L343
+    # from the visualwebarena default prompt
+    # https://github.com/web-arena-x/visualwebarena/blob/15890922c97a8694e366fde2d7de8dbd1ff63fb5/agent/prompts/jsons/p_cot_id_actree_3s.json#L2
+    "visualwebarena": [
+        #                   #     code      |      paper       |      prompt
+        scroll,  #            SCROLL        | scroll(dir)      | scroll [down|up]
+        keyboard_press,  #    KEY_PRESS     | press(key_comb)  | press [key_comb]
+        #                     MOUSE_CLICK   |                  |
+        #                     KEYBOARD_TYPE |                  |
+        #                     MOUSE_HOVER   |                  |
+        click,  #             CLICK         | click(elem)      | click [id]
+        fill,  #              TYPE          | type(elem, text) | type [id] [content]
+        hover,  #             HOVER         | hover(elem)      | hover [id]
+        tab_focus,  #         PAGE_FOCUS    | tab_focus(index) | tab_focus [tab_index]
+        new_tab,  #           NEW_TAB       | new_tab()        | new_tab
+        go_back,  #           GO_BACK       | go_back()        | go_back
+        go_forward,  #        GO_FORWARD    | go_forward()     | go_forward
+        goto,  #              GOTO_URL      | goto(url)        | goto [url]
+        tab_close,  #         PAGE_CLOSE    | tab_close()      | close_tab
+        #                     CHECK         |                  |
+        select_option,  #     SELECT_OPTION |                  |
+        send_msg_to_user,  #  STOP          | stop(answer)     | stop [answer]
+        #                     CLEAR         |                  |
+        upload_file,  #       UPLOAD        |                  |
+        report_infeasible,  ## explicit unachievable action, equivalent STOP "N/A"
+    ],
+    # from workarena paper
+    # https://arxiv.org/abs/2403.07718
+    "workarena": [
+        scroll,
+        fill,
+        select_option,
+        click,
+        dblclick,
+        hover,
+        press,
+        focus,
+        clear,
+        drag_and_drop,
+        send_msg_to_user,
+    ],
+    # from workarena++ paper
+    # https://arxiv.org/abs/2407.05291
+    "workarena++": [
+        scroll,
+        fill,
+        select_option,
+        click,
+        dblclick,
+        hover,
+        press,
+        focus,
+        clear,
+        drag_and_drop,
+        tab_focus,
+        new_tab,
+        tab_close,
+        go_back,
+        go_forward,
+        goto,
+        send_msg_to_user,
+        report_infeasible,
+    ],
+    # from weblinx_browsergym
+    # https://github.com/McGill-NLP/agentlab-weblinx-mvp/blob/a91b6d19870c5187d252e70a2e2013511cc6f1d2/weblinx_browsergym/__init__.py#L274-L286
+    "weblinx": [
+        send_msg_to_user,  # say(speaker="assistant", utterance=[str]) -> send_msg_to_user(text=[str])
+        click,  # click(uid=[element id]) -> click(bid=[element id])
+        hover,  # hover(uid=[element id]) -> hover(bid=[element id])
+        fill,  # textinput(uid=[element id], value=[str]) -> fill(bid=[element id], value=[str])
+        # change(uid=[element], value=[str]) -> ❌
+        goto,  # load(url=[link]) -> goto(url=[link])
+        # submit(uid=[element]) -> click(bid=[element id])
+        scroll,  # scroll(x=[int x],y=[int y]) -> scroll(delta_x=[int x], delta_y=[int y])
+        # copy(uid=[element],text=[str]) -> ❌
+        # paste(uid=[element],text=[str]) -> ❌
+        new_tab,  # tabcreate() -> new_tab()
+        tab_close,  # tabremove(target=[tabId]) -> tab_close()
+        tab_focus,  # tabswitch(origin=[origin tabId],target=[target tabId]) -> tab_focus(index=[target tabid])
+    ],
+    # from assistantbench paper
+    # https://arxiv.org/abs/2407.15711
+    "assistantbench": [
+        scroll,  # SCROLL
+        fill,  # TYPE
+        select_option,  # SELECT
+        click,  # CLICK
+        press,  # PRESS ENTER
+        go_back,  # GOBACK
+        goto,  # GOTO, SEARCH
+        send_msg_to_user,  # TERMINATE
+    ],
+}
+@dataclass
+class HighLevelAction:
+    # entrypoint: callable
+    signature: str
+    description: str
+    examples: list[str]
+class HighLevelActionSet(AbstractActionSet):
+    # static class variables
+    ActionSubset = typing.Literal[
+        "chat",
+        "infeas",
+        "bid",
+        "coord",
+        "nav",
+        "tab",
+        "miniwob_all",
+        "miniwob_shi17",
+        "miniwob_liu18",
+        "miniwob_humphreys22",
+        "webarena",
+        "visualwebarena",
+        "workarena",
+        "workarena++",
+        "weblinx",
+        "assistantbench",
+        "custom",
+    ]
+    DemoMode = typing.Literal["off", "default", "all_blue", "only_visible_elements"]
+    def __init__(
+        self,
+        subsets: typing.Optional[ActionSubset | list[ActionSubset]] = [
+            "chat",
+            "infeas",
+            "bid",
+            "nav",
+            "tab",
+        ],
+        custom_actions: typing.Optional[list[callable]] = None,
+        multiaction: bool = True,
+        demo_mode: typing.Optional[DemoMode] = None,
+        strict: bool = False,
+        retry_with_force: bool = False,
+    ):
+        super().__init__(strict)
+        self.multiaction = multiaction
+        self.demo_mode = demo_mode
+        self.retry_with_force = retry_with_force
+        if not subsets:
+            raise ValueError(f"'action_subsets' is empty.")
+        if isinstance(subsets, str):
+            subsets = [subsets]
+        allowed_actions = [noop]  # the noop action is always allowed
+        # add actions from specified action sets
+        if subsets:
+            for subset in subsets:
+                if subset in ACTION_SUBSETS:
+                    allowed_actions.extend(ACTION_SUBSETS[subset])
+                elif subset == "custom":
+                    if not custom_actions:
+                        raise ValueError(
+                            "'custom' is in 'action_subsets' but 'custom_actions' is empty."
+                        )
+                    allowed_actions.extend(custom_actions)
+                else:
+                    raise ValueError(f"Unknown high-level action subspace: {subset}")
+        # like set() but preserves order
+        # https://stackoverflow.com/questions/1653970/does-python-have-an-ordered-set
+        allowed_actions = list(dict.fromkeys(allowed_actions).keys())
+        # parse the actions and build the action space
+        self.action_set: dict[str, HighLevelAction] = {}
+        self.python_includes = ""
+        # include playwright imports
+        self.python_includes += f"""\
+import playwright.sync_api
+from typing import Literal
+"""
+        # set demo_mode and retry_with_force flags
+        self.python_includes += f"""\
+demo_mode={repr(demo_mode)}
+retry_with_force={repr(retry_with_force)}
+if demo_mode is None:
+    demo_mode = "default" if DEMO_MODE else "off"
+"""
+        # include utility functions
+        for _, func in inspect.getmembers(utils, inspect.isfunction):
+            self.python_includes += f"""\
+{inspect.getsource(func)}
+"""
+        # parse and include action functions
+        for func in allowed_actions:
+            # include action function definition in the code
+            self.python_includes += f"""\
+{inspect.getsource(func)}
+"""
+            # extract action signature
+            signature = f"{func.__name__}{inspect.signature(func)}"
+            # parse docstring
+            description, examples = action_docstring_parser.parse_string(func.__doc__)
+            # reconstruct action description
+            description = " ".join(description)
+            # reconstruct action examples
+            examples = [
+                function_name + "(" + ", ".join([repr(arg) for arg in function_args]) + ")"
+                for function_name, function_args in examples
+            ]
+            if func.__name__ in self.action_set:
+                raise ValueError(f"Duplicated action '{func.__name__}'")
+            self.action_set[func.__name__] = HighLevelAction(
+                # entrypoint=func,
+                signature=signature,
+                description=description,
+                examples=examples,
+            )
+    def example_action(self, abstract: bool, max_examples: int = 3) -> str:
+        """
+        Returns an example action as a string.
+        """
+        if abstract:
+            if self.multiaction:
+                return """\
+One or several actions, separated by new lines."""
+            else:
+                return """\
+One single action to be executed. You can only use one action at a time."""
+        else:
+            picked_examples = []
+            # use fill and click examples if action is present
+            for action_name in ["fill", "click", "mouse_click", "keyboard_type"]:
+                if action_name in self.action_set:
+                    picked_examples.extend(self.action_set[action_name].examples)
+            # last resort, use all action examples
+            if not picked_examples:
+                for _, action in self.action_set.items():
+                    picked_examples += action.examples
+            # shuffle examples
+            rng = random.Random(1)
+            rng.shuffle(picked_examples)
+            if self.multiaction:
+                return "\n".join(picked_examples[:max_examples])
+            else:
+                return picked_examples[0]
+    def describe(self, with_long_description: bool = True, with_examples: bool = True):
+        """
+        Returns a textual description of this action space.
+        """
+        description = f"""
+{len(self.action_set)} different types of actions are available.
+"""
+        for _, action in self.action_set.items():
+            description += f"""\
+{action.signature}
+"""
+            if with_long_description:
+                description += f"""\
+    Description: {action.description}
+"""
+            if with_examples and action.examples:
+                description += f"""\
+    Examples:
+"""
+                for example in action.examples:
+                    description += f"""\
+        {example}
+"""
+        if self.multiaction:
+            description += f"""\
+Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
+More than 2-3 actions usually leads to failure or unexpected behavior."""
+        else:
+            description += f"""\
+Only a single action can be provided at once."""
+        example_action = self.example_action(abstract=False)
+        if example_action:
+            description += f""" Example:
+{example_action}
+"""
+        else:
+            description += f"""\
+"""
+        return description
+    def to_python_code(self, action):
+        """
+        Converts the given high-level action string to browsergym-compatible python code.
+        Args:
+            action: the high-level action to parse.
+        Returns:
+            Executable python code that performs the action in a browsergym environment.
+        """
+        highlevel_code = action
+        # do the actual parsing and convert each high-level action to
+        # the corresponding python function call
+        if self.strict:
+            function_calls = highlevel_action_parser.parse_string(highlevel_code, parse_all=True)
+            function_calls = function_calls.as_list()
+        else:
+            function_calls = highlevel_action_parser.search_string(
+                highlevel_code
+            )  # allow for multiple matches, skip anything in-between
+            function_calls = sum(function_calls.as_list(), [])  # unpack multiple matches
+        if not function_calls:
+            raise ValueError("Received an empty action.")
+        elif len(function_calls) > 1 and not self.multiaction:
+            raise ValueError("Received a multi-action, only single-actions are allowed.")
+        python_code = ""
+        # function definitions
+        python_code += self.python_includes
+        # function calls
+        for function_name, function_args in function_calls:
+            if function_name not in self.action_set:
+                raise NameError(f"Invalid action type '{function_name}'.")
+            python_code += (
+                function_name + "(" + ", ".join([repr(arg) for arg in function_args]) + ")\n"
+            )
+        # return the constructed python code
+        return python_code
+# consistency checks
+assert "custom" not in ACTION_SUBSETS
+assert set(typing.get_args(HighLevelActionSet.ActionSubset)) == set(
+    list(ACTION_SUBSETS.keys()) + ["custom"]
+)

BrowserGym/browsergym/core/src/browsergym/core/action/parsers.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import ast
+import pyparsing as pp
+from dataclasses import dataclass
+from typing import Any
+@dataclass
+class NamedArgument:
+    name: str
+    value: Any
+    def __repr__(self):
+        return f"{self.name}={repr(self.value)}"
+def _build_highlevel_action_parser() -> pp.ParserElement:
+    """
+    Returns:
+        An action parser that accepts Python-like function calls with string, number, list or dict literals as arguments.
+        Example:
+            func("a", 42, None, True, [2, 4, "s"], {"a_key": "a_value"}, )
+        The parser is loose and accepts multi-line or single-line combinations af calls.
+        Example:
+            func() func()
+            \tfunc()
+        Python comments are ignored.
+        Example:
+            # this is a comment
+            func()    # this function call will be parsed
+            # func()  # this one will not
+        The parser will return a list of (function_name, function_args) tuples, one for each function call in the input.
+        The parser will raise exceptions
+    """
+    def make_keyword(kwd_str, kwd_value):
+        return pp.Keyword(kwd_str).set_parse_action(pp.replace_with(kwd_value))
+    TRUE = make_keyword("True", True)
+    FALSE = make_keyword("False", False)
+    NONE = make_keyword("None", None)
+    LBRACK, RBRACK, LBRACE, RBRACE, LPAREN, RPAREN, COLON = map(pp.Suppress, "[]{}():")
+    def literal_eval(toks):
+        return ast.literal_eval(toks[0])
+    string = pp.python_quoted_string().set_parse_action(literal_eval)
+    number = pp.pyparsing_common.number()
+    dict = pp.Forward().set_name("dict")  # will be defined later
+    list = pp.Forward().set_name("list")  # will be defined later
+    _tuple = pp.Forward().set_name("tuple")  # will be defined later
+    element = (string | number | dict | list | _tuple | TRUE | FALSE | NONE).set_name("element")
+    list_items = pp.DelimitedList(element, allow_trailing_delim=True).set_name(None)
+    list << pp.Group(LBRACK + pp.Optional(list_items) + RBRACK, aslist=True)
+    _tuple << pp.Group(LPAREN + pp.Optional(list_items) + RPAREN, aslist=True).set_parse_action(
+        lambda tokens: tuple(tokens[0])
+    )
+    dict_item = pp.Group(string + COLON + element, aslist=True).set_name("dict item")
+    dict_items = pp.DelimitedList(dict_item, allow_trailing_delim=True).set_name(None)
+    dict << pp.Dict(LBRACE + pp.Optional(dict_items) + RBRACE, asdict=True)
+    arg = element
+    list_args = pp.DelimitedList(arg, allow_trailing_delim=True).set_name(None)
+    named_arg = (pp.pyparsing_common.identifier() + pp.Literal("=") + element).set_parse_action(
+        lambda tokens: NamedArgument(name=tokens[0], value=tokens[2])
+    )
+    list_named_args = pp.DelimitedList(named_arg, allow_trailing_delim=True).set_name(None)
+    function_call = pp.pyparsing_common.identifier() + pp.Group(
+        LPAREN + pp.Optional(list_args) + pp.Optional(list_named_args) + RPAREN, aslist=True
+    )
+    multiple_function_calls = pp.DelimitedList(pp.Group(function_call), delim="")
+    multiple_function_calls.ignore(pp.python_style_comment())
+    parser = multiple_function_calls
+    return parser
+# this one will be used to extract python-like function calls
+highlevel_action_parser: pp.ParserElement = _build_highlevel_action_parser()
+# this one will be used to process the docstring in high-level actions, in order to describe the action space
+action_docstring_parser: pp.ParserElement = (
+    pp.Group(pp.OneOrMore(pp.Word(pp.printables), stop_on=pp.Literal("Examples:")))
+    + pp.Literal("Examples:").suppress()
+    + pp.Group(highlevel_action_parser)
+)

BrowserGym/browsergym/core/src/browsergym/core/action/python.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import re
+from .base import AbstractActionSet
+class PythonActionSet(AbstractActionSet):
+    def describe(self, with_long_description: bool = True, with_examples: bool = True):
+        """
+        Returns a textual description of this action space.
+        """
+        description = f"""
+Each action consists of executable Python code (python>=3.10) that uses the Playwright library (playwright==1.32)
+to interact with the current webpage and the browser context. The currently active webpage is accessible via the
+global variable `page`. A function `send_message_to_user(text)` is also accessible and can be used to send a
+message to the user, as well as a function `report_infeasible_instructions(reason)` to notify the user when their
+instructions are infeasible."""
+        if with_long_description:
+            description += f"""
+The browser context is in `page.context`, and all open webpages (tabs and popups)
+are in `page.context.pages`. Here is is an example of a valid action:
+```
+frame = page.frame_locator(".result-frame")
+button = frame.get_by_text("Submit")
+button.click()
+```
+Here is another example:
+```
+frame = page.get_by_test_id("a").frame_locator(":scope")
+frame.get_by_test_id("a776").click()
+```
+Note that Playwright's `get_by_test_id()` method is configured to use the `bid` attribute to locate HTML elements,
+instead of the default `data-testid`. Also, Playwright's locators can not traverse iframes, so you have to locate
+parent iframes first in order to locate an element in an iframe. The `bid` attribute contains all the information
+required to recursively locate an element. For example, an element with `bid="ac2"` can be retrieved as follows:
+```
+frame = page.get_by_test_id("a").frame_locator(":scope")
+frame = frame.get_by_test_id("ac").frame_locator(":scope")
+elem = frame.get_by_test_id("ac2")
+```
+"""
+        else:
+            description += f"""\
+"""
+        if with_examples:
+            description += f"""\
+Here are other examples of valid actions:
+```
+page = page.context.new_page()
+page.goto("https://www.wikipedia.org/")
+```
+```
+page.get_by_label("Birth date").fill("2020-02-02")
+page.get_by_role("link", name="Get started").click()
+```
+```
+page.get_by_label('I agree to the terms above').check()
+```
+```
+page.locator('#area').fill('Hello World!')
+```
+```
+page.get_by_role("textbox").press("Control+ArrowRight")
+```
+```
+send_message_to_user("There are 7 items to choose from.")
+```
+```
+report_infeasible_instructions("I cannot follow these instructions because there is no email field in this form.")
+```
+"""
+        return description
+    def example_action(self, abstract: bool) -> str:
+        """
+        Returns an example action as a string.
+        """
+        if abstract:
+            return """\
+One single bloc of Python code. Do not include any explanation, only valid Python code."""
+        else:
+            return """\
+frame = page.get_by_test_id("b").frame_locator(":scope")
+frame = page.get_by_test_id("ba").frame_locator(":scope")
+frame.get_by_test_id("ba2").fill("Hello world!")
+frame.get_by_test_id("ba3").click()
+"""
+    def to_python_code(self, action):
+        """
+        Converts the given code action string to browsergym-compatible playwright code.
+        Args:
+            action: the code action to parse.
+        Returns:
+            Executable playwright code that performs the action in a browsergym environment.
+        """
+        python_code = ""
+        # extract markdown-style code snippets if detected
+        pattern = re.compile(r"```(?:python)?\n(?P<code>[\s\S]*?)```")
+        if pattern.match(action):
+            python_code += "\n".join([match.group("code") for match in pattern.finditer(action)])
+        # otherwise just use the code action as is
+        else:
+            python_code += action
+        # return the produced playwright code
+        return python_code

BrowserGym/browsergym/core/src/browsergym/core/action/utils.py ADDED Viewed

	@@ -0,0 +1,288 @@

+from typing import Literal
+import playwright.sync_api
+def get_elem_by_bid(
+    page: playwright.sync_api.Page, bid: str, scroll_into_view: bool = False
+) -> playwright.sync_api.Locator:
+    """
+    Parse the given bid to sequentially locate every nested frame leading to the bid, then
+    locate the bid element. Bids are expected to take the form "abDb123", which means
+    the element abDb123 is located inside frame abDAb, which is located inside frame abDA,
+    which is located inside frame a, which is located inside the page's main frame.
+    Args:
+        bid: the browsergym id (playwright testid) of the page element.
+        scroll_into_view: try to scroll element into view, unless it is completely visible.
+    Returns:
+        Playwright element.
+        Bounding box of the element.
+    """
+    if not isinstance(bid, str):
+        raise ValueError(f"expected a string, got {repr(bid)}")
+    current_frame = page
+    # dive into each nested frame, to the frame where the element is located
+    i = 0
+    while bid[i:] and not bid[i:].isnumeric():
+        i += 1
+        # allow multi-character frame ids such as aA, bCD etc.
+        while bid[i:] and bid[i].isalpha() and bid[i].isupper():
+            i += 1
+        frame_bid = bid[:i]  # bid of the next frame to select
+        frame_elem = current_frame.get_by_test_id(frame_bid)
+        if not frame_elem.count():
+            raise ValueError(f'Could not find element with bid "{bid}"')
+        if scroll_into_view:
+            frame_elem.scroll_into_view_if_needed(timeout=500)
+        current_frame = frame_elem.frame_locator(":scope")
+    # finally, we should have selected the frame where the target element is
+    elem = current_frame.get_by_test_id(bid)
+    if not elem.count():
+        raise ValueError(f'Could not find element with bid "{bid}"')
+    if scroll_into_view:
+        elem.scroll_into_view_if_needed(timeout=500)
+    return elem
+def highlight_by_box(
+    page: playwright.sync_api.Page, box: dict, color: Literal["blue", "red"] = "blue"
+):
+    """Highlights the target element based on its bounding box attributes."""
+    assert color in ("blue", "red")
+    if box:
+        left, top, width, height = box["x"], box["y"], box["width"], box["height"]
+        page.evaluate(
+            f"""\
+const overlay = document.createElement('div');
+document.body.appendChild(overlay);
+overlay.setAttribute('style', `
+    all: initial;
+    position: fixed;
+    border: 2px solid transparent;  /* Start with transparent border */
+    borderRadius: 10px;  /* Add rounded corners */
+    boxShadow: 0 0 0px {color};  /* Initial boxShadow with 0px spread */
+    left: {left - 2}px;  /* Adjust left position to accommodate initial shadow spread */
+    top: {top - 2}px;  /* Adjust top position likewise */
+    width: {width}px;
+    height: {height}px;
+    z-index: 2147483646; /* Maximum value - 1 */
+    pointerEvents: none; /* Ensure the overlay does not interfere with user interaction */
+`);
+// Animate the boxShadow to create a "wave" effect
+let spread = 0;  // Initial spread radius of the boxShadow
+const waveInterval = setInterval(() => {{
+    spread += 10;  // Increase the spread radius to simulate the wave moving outward
+    overlay.style.boxShadow = `0 0 40px ${{spread}}px {color}`;  // Update boxShadow to new spread radius
+    overlay.style.opacity = 1 - spread / 38;  // Gradually decrease opacity to fade out the wave
+    if (spread >= 38) {{  // Assuming 76px ~ 2cm spread radius
+        clearInterval(waveInterval);  // Stop the animation once the spread radius reaches 2cm
+        document.body.removeChild(overlay);  // Remove the overlay from the document
+    }}
+}}, 200);  // Adjust the interval as needed to control the speed of the wave animation
+"""
+        )
+        # Wait a bit to let users see the highlight
+        page.wait_for_timeout(1000)  # Adjust delay as needed
+def smooth_move_visual_cursor_to(
+    page: playwright.sync_api.Page, x: float, y: float, speed: float = 400
+):
+    """
+    Smoothly moves the visual cursor to a specific point, with constant
+    movement speed.
+    Args:
+        x: target location X coordinate (in viewport pixels)
+        y: target location Y coordinate (in viewport pixels)
+        speed: cursor speed (in pixels per second)
+    """
+    movement_time = page.evaluate(
+        """\
+    ([targetX, targetY, speed]) => {
+        // create cursor if needed
+        if (!("browsergym_visual_cursor" in window)) {
+            if (window.trustedTypes && window.trustedTypes.createPolicy) {
+                window.trustedTypes.createPolicy('default', {
+                    createHTML: (string, sink) => string
+                });
+            }
+            let cursor = document.createElement('div');
+            cursor.setAttribute('id', 'browsergym-visual-cursor');
+            cursor.innerHTML = `
+                <svg width="50px" height="50px" viewBox="213 106 713 706" fill="none" xmlns="http://www.w3.org/2000/svg">
+                <path d="M213.333 106.667L426.667 853.333 512 512 853.333 426.667 213.333 106.667z" fill="blue"/>
+                </svg>
+`;
+            cursor.setAttribute('style', `
+                all: initial;
+                position: fixed;
+                opacity: 0.7; /* Slightly transparent */
+                z-index: 2147483647; /* Maximum value */
+                pointer-events: none; /* Ensures the SVG doesn't interfere with page interactions */
+            `);
+            // Calculate center position within the viewport
+            const centerX = window.innerWidth / 2;
+            const centerY = window.innerHeight / 2;
+            cursor.style.left = `${centerX}px`;
+            cursor.style.top = `${centerY}px`;
+            // save cursor element
+            window.browsergym_visual_cursor = cursor;
+            window.browsergym_visual_cursor_n_owners = 0;
+        }
+        // recover cursor
+        let cursor = window.browsergym_visual_cursor;
+        // attach cursor to document
+        document.body.appendChild(cursor);
+        window.browsergym_visual_cursor_n_owners += 1;
+        x = parseFloat(cursor.style.left);
+        y = parseFloat(cursor.style.top);
+        dx = targetX - x;
+        dy = targetY - y;
+        dist = Math.hypot(dx, dy);
+        movement_time = (dist / speed) * 1000;  // seconds to milliseconds
+        still_wait_time = 1000;
+        // Adjust steps based on distance to keep movement speed consistent
+        // 1 step per 10 pixels of distance, adjust as needed
+        steps = Math.max(1, Math.trunc(dist / 10));
+        step_dx = dx / steps;
+        step_dy = dy / steps;
+        step_dist = dist / steps;
+        step_wait_time = Math.max(10, movement_time / steps);
+        let step = 0;
+        let time_still = 0;
+        const cursorInterval = setInterval(() => {
+            // move cursor
+            if (step < steps) {
+                x += step_dx;
+                y += step_dy;
+                cursor.style.left = `${x}px`;
+                cursor.style.top = `${y}px`;
+            }
+            // still cursor (wait a bit)
+            else if (time_still < still_wait_time) {
+                time_still += step_wait_time;
+            }
+            // stop and detach cursor
+            else {
+                clearInterval(cursorInterval);
+                window.browsergym_visual_cursor_n_owners -= 1;
+                if (window.browsergym_visual_cursor_n_owners <= 0) {
+                    document.body.removeChild(cursor);
+                }
+            }
+            step += 1;
+        }, step_wait_time);
+        return movement_time;
+    }""",
+        [x, y, speed],
+    )
+    page.wait_for_timeout(movement_time)
+def check_for_overlay(
+    page: playwright.sync_api.Page, bid: str, element: playwright.sync_api.ElementHandle, box: dict
+):
+    if not element:
+        return False
+    visibility = element.get_attribute("browsergym_visibility_ratio")
+    if visibility is not None:
+        return float(visibility) >= 0.5
+    """Checks if a given element is the topmost element at its center position by default.
+    If check_corners is True, it checks if any of the corners is visible."""
+    if box:
+        # corners
+        points_to_check = [
+            (box["x"], box["y"]),
+            (box["x"] + box["width"], box["y"]),
+            (box["x"], box["y"] + box["height"]),
+            (box["x"] + box["width"], box["y"] + box["height"]),
+        ]
+        for x, y in points_to_check:
+            # Execute JavaScript to find the topmost element at the point.
+            top_element = page.evaluate(
+                f"""() => {{
+                const el = document.elementFromPoint({x}, {y});
+                return el ? el.outerHTML : '';
+            }}"""
+            )
+            # Check if the topmost element is the element we're interested in.
+            if top_element and bid in top_element:
+                return True
+    return False
+def add_demo_mode_effects(
+    page: playwright.sync_api.Page,
+    elem: playwright.sync_api.ElementHandle,
+    bid: str,
+    demo_mode: Literal["off", "default", "all_blue", "only_visible_elements"],
+    move_cursor: bool = True,
+    highlight_box: bool = True,
+):
+    if demo_mode == "off":
+        return
+    """Adds visual effects to the target element"""
+    box = elem.bounding_box()
+    # box = extract_bounds_cdp(page, bid)
+    if box:
+        center_x, center_y = box["x"] + box["width"] / 2, box["y"] + box["height"] / 2
+        is_top_element = check_for_overlay(page, bid, elem, box)
+        if demo_mode == "only_visible_elements":
+            if not is_top_element:
+                return
+            else:
+                color = "blue"
+        elif demo_mode == "default":
+            if is_top_element:
+                color = "blue"
+            else:
+                color = "red"
+        elif demo_mode == "all_blue":
+            color = "blue"
+        if move_cursor:
+            smooth_move_visual_cursor_to(page, center_x, center_y)
+        if highlight_box:
+            highlight_by_box(page, box, color=color)
+def call_fun(fun: callable, retry_with_force: bool):
+    try:
+        fun(force=False)
+    except playwright.sync_api.TimeoutError as e:
+        if retry_with_force:
+            fun(force=True)
+        else:
+            raise e

BrowserGym/browsergym/core/src/browsergym/core/chat.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import base64
+from pathlib import Path
+from typing import Literal
+import logging
+import playwright.sync_api
+import re
+import time
+from importlib import resources
+from . import _get_global_playwright, chat_files
+CHATBOX_DIR = resources.files(chat_files)
+logger = logging.getLogger(__name__)
+class Chat:
+    def __init__(
+        self, headless: bool, chat_size=(500, 800), record_video_dir=None, modern=True
+    ) -> None:
+        self.messages = []
+        # create a new browser, browser context and page for the chat
+        pw: playwright.sync_api.Playwright = _get_global_playwright()
+        self.browser = pw.chromium.launch(
+            headless=headless, args=[f"--window-size={chat_size[0]},{chat_size[1]}"]
+        )
+        self.context = self.browser.new_context(
+            no_viewport=True,
+            record_video_dir=Path(record_video_dir) / "chat_video" if record_video_dir else None,
+            record_video_size=dict(width=chat_size[0], height=chat_size[1]),
+        )
+        self.page = self.context.new_page()
+        self.recording_start_time = time.time() if record_video_dir else None
+        # setup the chat page
+        self.page.expose_function(
+            "send_user_message", lambda msg: self._js_user_message_received_callback(msg=msg)
+        )
+        if modern:
+            self.page.set_content(get_chatbox_modern(CHATBOX_DIR))
+        else:
+            self.page.set_content(get_chatbox_classic(CHATBOX_DIR))
+    def _js_user_message_received_callback(self, msg: str):
+        """Callback function for when a user message is received in the chatbox"""
+        utc_time = time.time()
+        self.messages.append({"role": "user", "timestamp": utc_time, "message": msg})
+        # returning a list as JS doesnt like tuples
+        return ["user", time.strftime("%H:%M", time.localtime(utc_time)), msg]
+    def add_message(
+        self, role: Literal["user", "user_image", "assistant", "info", "infeasible"], msg: str
+    ):
+        """Add a message to the chatbox and update the page accordingly."""
+        utc_time = time.time()
+        if role not in ("user", "user_image", "assistant", "info", "infeasible"):
+            raise ValueError(f"Invalid role: {role}")
+        if role in ("user", "user_image", "assistant", "infeasible"):
+            self.messages.append({"role": role, "timestamp": utc_time, "message": msg})
+        timestamp = time.strftime("%H:%M:%S", time.localtime(utc_time))
+        self.page.evaluate(f"addChatMessage({repr(role)}, {repr(timestamp)}, {repr(msg)});")
+    def wait_for_user_message(self):
+        logger.info("Waiting for message from user...")
+        # reset flag
+        self.page.evaluate("USER_MESSAGE_RECEIVED = false;")
+        # wait for flag to be raised
+        self.page.wait_for_function("USER_MESSAGE_RECEIVED", polling=100, timeout=0)
+        logger.info("Message received.")
+    def close(self):
+        self.context.close()
+        self.browser.close()
+def get_chatbox_modern(chatbox_dir) -> str:
+    with open(chatbox_dir / "chatbox_modern.html", "r") as file:
+        chatbox_html = file.read()
+    return chatbox_html
+def get_chatbox_classic(chatbox_dir) -> str:
+    with open(chatbox_dir / "chatbox.html", "r") as file:
+        chatbox_html = file.read()
+    with open(chatbox_dir / "assistant.png", "rb") as f:
+        image_base64 = base64.b64encode(f.read()).decode("utf-8")
+    assistant_image_url = f"data:image/png;base64,{image_base64}"
+    chatbox_html = re.sub("<ASSISTANT_IMAGE_URL>", assistant_image_url, chatbox_html)
+    return chatbox_html

BrowserGym/browsergym/core/src/browsergym/core/chat_files/chatbox.html ADDED Viewed

	@@ -0,0 +1,243 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>UI Assistant Chat</title>
+    <style>
+        .chat-container {
+            display: flex;
+            flex-flow: column;
+            position: fixed;
+            bottom: 0;
+            right: 0;
+            height: 100%;
+            width: 100%;
+            border: 1px solid black;
+            background-color: white;
+            padding: 0;
+            overflow: hidden;
+            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+            font-family: 'Source Sans Pro', Arial, Helvetica, sans-serif;
+        }
+        .chat-header {
+            background-color: #032D42;
+            color: white;
+            padding: 5px;
+            padding-left: 15px;
+            text-align: center;
+            flex: 0 1 auto;
+        }
+        .chat-body {
+            padding: 10px;
+            overflow-y: auto;
+            display: flex;
+            flex-direction: column;
+            flex: 1 1 auto;
+        }
+        .chat-debug {
+            padding: 10px;
+            max-height: 30%;
+            overflow-y: auto;
+            display: flex;
+            flex-direction: column;
+            flex: 0 0 auto;
+        }
+        .chat-input-area {
+            display: flex;
+            flex-flow: row;
+            margin-top: 5px;
+            margin-top: 5px;
+            padding: 10px;
+            border-top: 1px solid #ddd;
+            flex: 0 1 50px;
+        }
+        .chat-input-area form {
+            display: flex;
+            width: 100%;
+            height: 100%;
+        }
+        .input-box {
+            padding: 5px;
+            margin-right: 10px;
+            border-radius: 5px;
+            border: 1px solid #ccc;
+            width: 100%;
+        }
+        .submit-button {
+            padding: 5px 10px;
+            border-radius: 5px;
+            background-color: #4CAF50;
+            color: white;
+            border: none;
+            align-self: center;
+        }
+        .message {
+            display: flex;
+            align-items: center;
+            margin: 0px;
+            padding: 0px;
+        }
+        .message p {
+            padding: 10px;
+            /* Added padding inside the bubble */
+            border-radius: 15px;
+            flex-grow: 1;
+            margin-top: 10;
+            margin-bottom: 0;
+        }
+        .chat-debug .message p {
+            padding: 0;
+            border-radius: 0;
+            flex-grow: 1;
+            margin-top: 0;
+            margin-bottom: 0;
+        }
+        .user-message {
+            background-color: #d1f4d1;
+        }
+        .assistant-message {
+            background-color: #e0e0e0;
+        }
+        .info-message {
+            background-color: #f0f0f0;
+            color: #707070;
+            font-size: 13px;
+        }
+        .assistant-image {
+            margin: 0px;
+            padding: 10px;
+            width: 40px;
+        }
+    </style>
+</head>
+<body>
+    <div class="chat-container">
+        <div class="chat-header">
+            <h2>BrowserGym</h2>
+        </div>
+        <div class="chat-body" id="chatBody"></div>
+        <div class="chat-debug" id="chatDebug"></div>
+        <div class="chat-input-area">
+            <form id="chatForm">
+                <textarea class="input-box" rows="2" id="inputBox"></textarea>
+                <input type="submit" class="submit-button" value="Send">
+            </form>
+        </div>
+    </div>
+    <script>
+        const assistant_image_data = "<ASSISTANT_IMAGE_URL>";
+        var USER_MESSAGE_RECEIVED = false;
+        function escapeHtml(unsafe) {
+            return unsafe
+                .replace(/&/g, "&amp;")
+                .replace(/</g, "&lt;")
+                .replace(/>/g, "&gt;")
+                .replace(/"/g, "&quot;")
+                .replace(/'/g, "&#039;");
+        }
+        function addChatMessage(role, msg) {
+            const chatBody = document.getElementById('chatBody');
+            const chatDebug = document.getElementById('chatDebug');
+            const msgContainer = document.createElement('div');
+            msgContainer.className = 'message';
+            const text = document.createElement('p');
+            text.innerHTML = escapeHtml(msg);
+            const assistant_img = document.createElement('img');
+            assistant_img.src = assistant_image_data;
+            assistant_img.alt = 'Assistant';
+            assistant_img.className = 'assistant-image';
+            switch (role) {
+                case "user":
+                    text.className = 'user-message';
+                    msgContainer.appendChild(text);
+                    chatBody.appendChild(msgContainer);
+                    break;
+                case "assistant":
+                    text.className = 'assistant-message';
+                    msgContainer.appendChild(assistant_img); // Add the image to the message container
+                    msgContainer.appendChild(text);
+                    chatBody.appendChild(msgContainer);
+                    break;
+                case "info":
+                    text.className = 'info-message';
+                    text.innerHTML = msg;
+                    msgContainer.appendChild(text);
+                    // hide previous debug messages
+                    for (const msg of chatDebug.children) {
+                        msg.style.display = 'none';
+                    }
+                    chatDebug.appendChild(msgContainer);
+                    break;
+                default:
+                    throw new TypeError(`Illegal role "${role}".`);
+            }
+            chatBody.scrollTop = chatBody.scrollHeight;
+            if (role === "user") {
+                USER_MESSAGE_RECEIVED = true;
+            }
+        }
+        if (typeof send_user_message !== 'function') {
+            function send_user_message(msg) {
+                // This will be overloaded by playwright
+            }
+        }
+        const inputBox = document.getElementById('inputBox');
+        function send_msg(msg) {
+            if (msg.trim()) {
+                send_user_message(msg);
+                addChatMessage('user', msg);
+                inputBox.value = '';
+            }
+        }
+        inputBox.onkeypress = (e) => {
+            if (e.key === 'Enter' && !e.shiftKey) {
+                e.preventDefault();
+                send_msg(inputBox.value);
+            }
+        };
+        document.getElementById('chatForm').onsubmit = function (event) {
+            event.preventDefault();
+            send_msg(inputBox.value);
+            return false;
+        }
+    </script>
+</body>
+</html>